import quandl
import pandas as pd
import numpy as np
import datetime as dt
import pandas_profiling
from time import time
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
# import libraries here; add more as necessary
import numpy as np
import pandas as pd
from time import time
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
from sklearn.model_selection import train_test_split
from collections import defaultdict
# Import supplementary visualization code visuals.py
#import visuals as vs
from numpy import concatenate
# magic word for producing visualizations in notebook.allow plots to appear directly in the notebook
%matplotlib inline
#LSTM
# magic word for producing visualizations in notebook.allow plots to appear directly in the notebook
%matplotlib inline
from subprocess import check_output
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.layers.embeddings import Embedding
from keras.models import Sequential
from keras.layers import LSTM, CuDNNLSTM , BatchNormalization
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import time
from numpy import newaxis
#fbProphet libraries
from fbprophet import Prophet
# plt.style.available
plt.style.use("seaborn-whitegrid")
import plotly.figure_factory as ff
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import statsmodels.api as sm
from scipy import stats
Using TensorFlow backend.
quandl.ApiConfig.api_key = "v5Bazu_S389s29HiutZh"
#Get data from Quandl APIs into dataframes
SP500_DIV_YIELD_MONTH = quandl.get('MULTPL/SP500_DIV_YIELD_MONTH') #MULTPLkeys[0]
SP500_PE_RATIO_MONTH = quandl.get('MULTPL/SP500_PE_RATIO_MONTH')
SHILLER_PE_RATIO_MONTH = quandl.get('MULTPL/SHILLER_PE_RATIO_MONTH')
SP500_EARNINGS_YIELD_MONTH = quandl.get('MULTPL/SP500_EARNINGS_YIELD_MONTH')
SP500_INFLADJ_MONTH = quandl.get('MULTPL/SP500_INFLADJ_MONTH')#MULTPLkeys[4]
SP500_PSR_QUARTER = quandl.get('MULTPL/SP500_PSR_QUARTER')
SP500_DIV_MONTH = quandl.get('MULTPL/SP500_DIV_MONTH')
SP500_DIV_YEAR = quandl.get('MULTPL/SP500_DIV_YEAR')
SP500_DIV_GROWTH_YEAR = quandl.get('MULTPL/SP500_DIV_GROWTH_YEAR')
SP500_DIV_GROWTH_QUARTER = quandl.get('MULTPL/SP500_DIV_GROWTH_QUARTER')
SP500_PBV_RATIO_QUARTER = quandl.get('MULTPL/SP500_PBV_RATIO_QUARTER') #MULTPLkeys[10]
SHILLER_PE_RATIO_YEAR = quandl.get('MULTPL/SHILLER_PE_RATIO_YEAR')
SP500_PE_RATIO_YEAR = quandl.get('MULTPL/SP500_PE_RATIO_YEAR')
SP500_DIV_YIELD_YEAR = quandl.get('MULTPL/SP500_DIV_YIELD_YEAR')
SP500_PSR_YEAR = quandl.get('MULTPL/SP500_PSR_YEAR')
SP500_EARNINGS_YIELD_YEAR = quandl.get('MULTPL/SP500_EARNINGS_YIELD_YEAR')
SP500_PBV_RATIO_YEAR = quandl.get('MULTPL/SP500_PBV_RATIO_YEAR')
SP500_INFLADJ_YEAR = quandl.get('MULTPL/SP500_INFLADJ_YEAR')
SP500_REAL_PRICE_MONTH = quandl.get('MULTPL/SP500_REAL_PRICE_MONTH') #MULTPLkeys[18]
SP500_SALES_YEAR = quandl.get('MULTPL/SP500_SALES_YEAR')
SP500_SALES_GROWTH_YEAR = quandl.get('MULTPL/SP500_SALES_GROWTH_YEAR') #MULTPLkeys[20]
SP500_SALES_QUARTER = quandl.get('MULTPL/SP500_SALES_QUARTER')
SP500_REAL_SALES_GROWTH_QUARTER = quandl.get('MULTPL/SP500_REAL_SALES_GROWTH_QUARTER')
SP500_SALES_GROWTH_QUARTER = quandl.get('MULTPL/SP500_SALES_GROWTH_QUARTER')
SP500_REAL_SALES_GROWTH_YEAR = quandl.get('MULTPL/SP500_REAL_SALES_GROWTH_YEAR')
SP500_REAL_EARNINGS_GROWTH_YEAR = quandl.get('MULTPL/SP500_REAL_EARNINGS_GROWTH_YEAR')
SP500_REAL_SALES_YEAR = quandl.get('MULTPL/SP500_REAL_SALES_YEAR')
SP500_REAL_EARNINGS_GROWTH_QUARTER = quandl.get('MULTPL/SP500_REAL_EARNINGS_GROWTH_QUARTER')
SP500_EARNINGS_GROWTH_QUARTER = quandl.get('MULTPL/SP500_EARNINGS_GROWTH_QUARTER')
SP500_REAL_SALES_QUARTER = quandl.get('MULTPL/SP500_REAL_SALES_QUARTER')
SP500_EARNINGS_MONTH = quandl.get('MULTPL/SP500_EARNINGS_MONTH') #MULTPLkeys[30]
SP500_BVPS_YEAR = quandl.get('MULTPL/SP500_BVPS_YEAR')
SP500_EARNINGS_YEAR = quandl.get('MULTPL/SP500_EARNINGS_YEAR')
SP500_EARNINGS_GROWTH_YEAR = quandl.get('MULTPL/SP500_EARNINGS_GROWTH_YEAR')
SP500_BVPS_QUARTER = quandl.get('MULTPL/SP500_BVPS_QUARTER')
SP500_REAL_PRICE_YEAR = quandl.get('MULTPL/SP500_REAL_PRICE_YEAR') #MULTPLkeys[35]
#API keywords list
MULTPLkeys = [
'MULTPL/SP500_DIV_YIELD_MONTH',
'MULTPL/SP500_PE_RATIO_MONTH',
'MULTPL/SHILLER_PE_RATIO_MONTH',
'MULTPL/SP500_EARNINGS_YIELD_MONTH',
'MULTPL/SP500_INFLADJ_MONTH',
'MULTPL/SP500_PSR_QUARTER',
'MULTPL/SP500_DIV_MONTH',
'MULTPL/SP500_DIV_YEAR',
'MULTPL/SP500_DIV_GROWTH_YEAR',
'MULTPL/SP500_DIV_GROWTH_QUARTER',
'MULTPL/SP500_PBV_RATIO_QUARTER',
'MULTPL/SHILLER_PE_RATIO_YEAR',
'MULTPL/SP500_PE_RATIO_YEAR',
'MULTPL/SP500_DIV_YIELD_YEAR',
'MULTPL/SP500_PSR_YEAR',
'MULTPL/SP500_EARNINGS_YIELD_YEAR',
'MULTPL/SP500_PBV_RATIO_YEAR',
'MULTPL/SP500_INFLADJ_YEAR',
'MULTPL/SP500_REAL_PRICE_MONTH',
'MULTPL/SP500_SALES_YEAR',
'MULTPL/SP500_SALES_GROWTH_YEAR',
'MULTPL/SP500_SALES_QUARTER',
'MULTPL/SP500_REAL_SALES_GROWTH_QUARTER',
'MULTPL/SP500_SALES_GROWTH_QUARTER',
'MULTPL/SP500_REAL_SALES_GROWTH_YEAR',
'MULTPL/SP500_REAL_EARNINGS_GROWTH_YEAR',
'MULTPL/SP500_REAL_SALES_YEAR',
'MULTPL/SP500_REAL_EARNINGS_GROWTH_QUARTER',
'MULTPL/SP500_EARNINGS_GROWTH_QUARTER',
'MULTPL/SP500_REAL_SALES_QUARTER',
'MULTPL/SP500_EARNINGS_MONTH',
'MULTPL/SP500_BVPS_YEAR',
'MULTPL/SP500_EARNINGS_YEAR',
'MULTPL/SP500_EARNINGS_GROWTH_YEAR',
'MULTPL/SP500_BVPS_QUARTER',
'MULTPL/SP500_REAL_PRICE_YEAR'
]
#Split API keywords to create df variable names
var_list = [i.split('/')[1] for i in MULTPLkeys]
print(var_list[0],var_list[1],var_list[2])
SP500_DIV_YIELD_MONTH SP500_PE_RATIO_MONTH SHILLER_PE_RATIO_MONTH
# def get_data(i):
# return quandl.get('MULTPL/{}'.format(i))
var_list
['SP500_DIV_YIELD_MONTH', 'SP500_PE_RATIO_MONTH', 'SHILLER_PE_RATIO_MONTH', 'SP500_EARNINGS_YIELD_MONTH', 'SP500_INFLADJ_MONTH', 'SP500_PSR_QUARTER', 'SP500_DIV_MONTH', 'SP500_DIV_YEAR', 'SP500_DIV_GROWTH_YEAR', 'SP500_DIV_GROWTH_QUARTER', 'SP500_PBV_RATIO_QUARTER', 'SHILLER_PE_RATIO_YEAR', 'SP500_PE_RATIO_YEAR', 'SP500_DIV_YIELD_YEAR', 'SP500_PSR_YEAR', 'SP500_EARNINGS_YIELD_YEAR', 'SP500_PBV_RATIO_YEAR', 'SP500_INFLADJ_YEAR', 'SP500_REAL_PRICE_MONTH', 'SP500_SALES_YEAR', 'SP500_SALES_GROWTH_YEAR', 'SP500_SALES_QUARTER', 'SP500_REAL_SALES_GROWTH_QUARTER', 'SP500_SALES_GROWTH_QUARTER', 'SP500_REAL_SALES_GROWTH_YEAR', 'SP500_REAL_EARNINGS_GROWTH_YEAR', 'SP500_REAL_SALES_YEAR', 'SP500_REAL_EARNINGS_GROWTH_QUARTER', 'SP500_EARNINGS_GROWTH_QUARTER', 'SP500_REAL_SALES_QUARTER', 'SP500_EARNINGS_MONTH', 'SP500_BVPS_YEAR', 'SP500_EARNINGS_YEAR', 'SP500_EARNINGS_GROWTH_YEAR', 'SP500_BVPS_QUARTER', 'SP500_REAL_PRICE_YEAR']
# var_list1 = [
# 'SP500_DIV_YIELD_MONTH1',
# 'SP500_PE_RATIO_MONTH2',
# 'SHILLER_PE_RATIO_MONTH']
# for i in var_list:
# #i = pd.DataFrame(get_data(i))
# exec(f'{i} = get_data(i)')
# #print(get_data(i))
# print(i)
# break
# x= 'SP500_DIV_YIELD_MONTH'
# exec("%s = %d" % (x,0))
# print(SP500_DIV_YIELD_MONTH)
# print(x)
#exec(f'{i} = get_data(i)')
# for i in range(len(var_list)//10):
# x= var_list[i]
# exec("%s = %d" % (x,0))
# #print(var_list[i])
# #print(x)
# print(SP500_DIV_YIELD_MONTH)
# # SP500_DIV_YIELD_MONTH = quandl.get(str(MULTPLkeys[0]))
# # print(SP500_DIV_YIELD_MONTH.head())
# #SP500_DIV_YIELD_MONTH
# #SP500_PE_RATIO_MONTH
# #SHILLER_PE_RATIO_MONTH
# #dynamic variables
# x= var_list[1]
# exec("%s = %s" % (x,x))
# SP500_PE_RATIO_MONTH
print(SP500_REAL_PRICE_MONTH.head())
print(SP500_EARNINGS_YIELD_MONTH.head())
Value
Date
1871-01-01 4.44
1871-02-01 4.50
1871-03-01 4.61
1871-04-01 4.74
1871-05-01 4.86
Value
Date
1871-01-01 9.01
1871-02-01 8.89
1871-03-01 8.68
1871-04-01 8.44
1871-05-01 8.23
df = pd.merge(pd.merge(SP500_REAL_PRICE_MONTH,SP500_EARNINGS_YIELD_MONTH,on='Date'),SHILLER_PE_RATIO_MONTH,on='Date') df.head()
Assumptions-
outer join between price and monthly yield df to include all dates of every month.
SP500_REAL_PRICE_MONTH is the target label hence start with this df to include all rows
SP500_DIV_YIELD_MONTH.head()
| Value | |
|---|---|
| Date | |
| 1871-01-31 | 5.86 |
| 1871-02-28 | 5.78 |
| 1871-03-31 | 5.64 |
| 1871-04-30 | 5.49 |
| 1871-05-31 | 5.35 |
#Join all month dataframes together on Date column.
df = SP500_REAL_PRICE_MONTH.join(SP500_DIV_YIELD_MONTH,on=None,how='outer',
lsuffix='_SP500_REAL_PRICE_MONTH',rsuffix='_SP500_DIV_YIELD_MONTH',sort=False)
df= df.join(SP500_PE_RATIO_MONTH,on='Date',how='left',sort=False)
df = df.merge(
SHILLER_PE_RATIO_MONTH,on='Date',how='left').merge(
SP500_EARNINGS_YIELD_MONTH,on='Date',how='left').merge(
SP500_INFLADJ_MONTH,on='Date',how='left').merge(
SP500_PSR_QUARTER,on='Date',how='left').merge(
SP500_DIV_MONTH,on='Date',how='outer').merge(
SP500_DIV_YEAR,on='Date',how='left')
df.columns = ['Value_SP500_REAL_PRICE_MONTH','Value_SP500_DIV_YIELD_MONTH','Value_SP500_PE_RATIO_MONTH',
'Value_SHILLER_PE_RATIO_MONTH','Value_SP500_EARNINGS_YIELD_MONTH','Value_SP500_INFLADJ_MONTH',
'Value_SP500_PSR_QUARTER','Value_SP500_DIV_MONTH','Value_SP500_DIV_YEAR'
]
#print(df.head(6))
print(df.columns)
print(df.shape)
Index(['Value_SP500_REAL_PRICE_MONTH', 'Value_SP500_DIV_YIELD_MONTH',
'Value_SP500_PE_RATIO_MONTH', 'Value_SHILLER_PE_RATIO_MONTH',
'Value_SP500_EARNINGS_YIELD_MONTH', 'Value_SP500_INFLADJ_MONTH',
'Value_SP500_PSR_QUARTER', 'Value_SP500_DIV_MONTH',
'Value_SP500_DIV_YEAR'],
dtype='object')
(3548, 9)
def Merge__Rename_function(df,df_var,column_name):
df = df.merge(df_var,on='Date',how='outer')
Column_name = 'Value_' + str(column_name)
#print(Column_name)
df.rename({'Value':Column_name},axis=1,inplace=True)
#print(df.head(6))
print(df.shape)
#print(df.columns)
return df
var_list
['SP500_DIV_YIELD_MONTH', 'SP500_PE_RATIO_MONTH', 'SHILLER_PE_RATIO_MONTH', 'SP500_EARNINGS_YIELD_MONTH', 'SP500_INFLADJ_MONTH', 'SP500_PSR_QUARTER', 'SP500_DIV_MONTH', 'SP500_DIV_YEAR', 'SP500_DIV_GROWTH_YEAR', 'SP500_DIV_GROWTH_QUARTER', 'SP500_PBV_RATIO_QUARTER', 'SHILLER_PE_RATIO_YEAR', 'SP500_PE_RATIO_YEAR', 'SP500_DIV_YIELD_YEAR', 'SP500_PSR_YEAR', 'SP500_EARNINGS_YIELD_YEAR', 'SP500_PBV_RATIO_YEAR', 'SP500_INFLADJ_YEAR', 'SP500_REAL_PRICE_MONTH', 'SP500_SALES_YEAR', 'SP500_SALES_GROWTH_YEAR', 'SP500_SALES_QUARTER', 'SP500_REAL_SALES_GROWTH_QUARTER', 'SP500_SALES_GROWTH_QUARTER', 'SP500_REAL_SALES_GROWTH_YEAR', 'SP500_REAL_EARNINGS_GROWTH_YEAR', 'SP500_REAL_SALES_YEAR', 'SP500_REAL_EARNINGS_GROWTH_QUARTER', 'SP500_EARNINGS_GROWTH_QUARTER', 'SP500_REAL_SALES_QUARTER', 'SP500_EARNINGS_MONTH', 'SP500_BVPS_YEAR', 'SP500_EARNINGS_YEAR', 'SP500_EARNINGS_GROWTH_YEAR', 'SP500_BVPS_QUARTER', 'SP500_REAL_PRICE_YEAR']
df = Merge__Rename_function(df,SP500_DIV_GROWTH_YEAR,'SP500_DIV_GROWTH_YEAR')
df = Merge__Rename_function(df,SP500_DIV_GROWTH_QUARTER,'SP500_DIV_GROWTH_QUARTER')
df = Merge__Rename_function(df,SP500_PBV_RATIO_QUARTER,'SP500_PBV_RATIO_QUARTER')
df = Merge__Rename_function(df,SHILLER_PE_RATIO_YEAR,'SHILLER_PE_RATIO_YEAR')
df = Merge__Rename_function(df,SP500_PE_RATIO_YEAR,'SP500_PE_RATIO_YEAR')
df = Merge__Rename_function(df,SP500_DIV_YIELD_YEAR,'SP500_DIV_YIELD_YEAR')
df = Merge__Rename_function(df,SP500_PSR_YEAR,'SP500_PSR_YEAR')
df = Merge__Rename_function(df,SP500_EARNINGS_YIELD_YEAR,'SP500_EARNINGS_YIELD_YEAR')
df = Merge__Rename_function(df,SP500_PBV_RATIO_YEAR,'SP500_PBV_RATIO_YEAR')
df = Merge__Rename_function(df,SP500_INFLADJ_YEAR,'SP500_INFLADJ_YEAR')
df = Merge__Rename_function(df,SP500_SALES_YEAR,'SP500_SALES_YEAR')
df = Merge__Rename_function(df,SP500_SALES_GROWTH_YEAR,'SP500_SALES_GROWTH_YEAR')
df = Merge__Rename_function(df,SP500_SALES_QUARTER,'SP500_SALES_QUARTER')
df = Merge__Rename_function(df,SP500_REAL_SALES_GROWTH_QUARTER,'SP500_REAL_SALES_GROWTH_QUARTER')
df = Merge__Rename_function(df,SP500_SALES_GROWTH_QUARTER,'SP500_SALES_GROWTH_QUARTER')
df = Merge__Rename_function(df,SP500_REAL_SALES_GROWTH_YEAR,'SP500_REAL_SALES_GROWTH_YEAR')
df = Merge__Rename_function(df,SP500_REAL_EARNINGS_GROWTH_YEAR,'SP500_REAL_EARNINGS_GROWTH_YEAR')
df = Merge__Rename_function(df,SP500_REAL_SALES_YEAR,'SP500_REAL_SALES_YEAR')
df = Merge__Rename_function(df,SP500_REAL_EARNINGS_GROWTH_QUARTER,var_list[27])
df = Merge__Rename_function(df,SP500_EARNINGS_GROWTH_QUARTER,var_list[28])
df = Merge__Rename_function(df,SP500_REAL_SALES_QUARTER,var_list[29])
df = Merge__Rename_function(df,SP500_EARNINGS_MONTH,var_list[30])
df = Merge__Rename_function(df,SP500_BVPS_YEAR,var_list[31])
df = Merge__Rename_function(df,SP500_EARNINGS_YEAR,var_list[32])
df = Merge__Rename_function(df,SP500_EARNINGS_GROWTH_YEAR,var_list[33])
df = Merge__Rename_function(df,SP500_BVPS_QUARTER,var_list[34])
#df = Merge__Rename_function(df,SP500_REAL_PRICE_YEAR,var_list[35])
df.columns
(3553, 10) (3559, 11) (3559, 12) (3559, 13) (3559, 14) (3559, 15) (3559, 16) (3559, 17) (3559, 18) (3559, 19) (3559, 20) (3559, 21) (3559, 22) (3559, 23) (3559, 24) (3559, 25) (3559, 26) (3559, 27) (3559, 28) (3559, 29) (3559, 30) (3559, 31) (3559, 32) (3559, 33) (3559, 34) (3559, 35)
Index(['Value_SP500_REAL_PRICE_MONTH', 'Value_SP500_DIV_YIELD_MONTH',
'Value_SP500_PE_RATIO_MONTH', 'Value_SHILLER_PE_RATIO_MONTH',
'Value_SP500_EARNINGS_YIELD_MONTH', 'Value_SP500_INFLADJ_MONTH',
'Value_SP500_PSR_QUARTER', 'Value_SP500_DIV_MONTH',
'Value_SP500_DIV_YEAR', 'Value_SP500_DIV_GROWTH_YEAR',
'Value_SP500_DIV_GROWTH_QUARTER', 'Value_SP500_PBV_RATIO_QUARTER',
'Value_SHILLER_PE_RATIO_YEAR', 'Value_SP500_PE_RATIO_YEAR',
'Value_SP500_DIV_YIELD_YEAR', 'Value_SP500_PSR_YEAR',
'Value_SP500_EARNINGS_YIELD_YEAR', 'Value_SP500_PBV_RATIO_YEAR',
'Value_SP500_INFLADJ_YEAR', 'Value_SP500_SALES_YEAR',
'Value_SP500_SALES_GROWTH_YEAR', 'Value_SP500_SALES_QUARTER',
'Value_SP500_REAL_SALES_GROWTH_QUARTER',
'Value_SP500_SALES_GROWTH_QUARTER',
'Value_SP500_REAL_SALES_GROWTH_YEAR',
'Value_SP500_REAL_EARNINGS_GROWTH_YEAR', 'Value_SP500_REAL_SALES_YEAR',
'Value_SP500_REAL_EARNINGS_GROWTH_QUARTER',
'Value_SP500_EARNINGS_GROWTH_QUARTER', 'Value_SP500_REAL_SALES_QUARTER',
'Value_SP500_EARNINGS_MONTH', 'Value_SP500_BVPS_YEAR',
'Value_SP500_EARNINGS_YEAR', 'Value_SP500_EARNINGS_GROWTH_YEAR',
'Value_SP500_BVPS_QUARTER'],
dtype='object')
df.head()
| Value_SP500_REAL_PRICE_MONTH | Value_SP500_DIV_YIELD_MONTH | Value_SP500_PE_RATIO_MONTH | Value_SHILLER_PE_RATIO_MONTH | Value_SP500_EARNINGS_YIELD_MONTH | Value_SP500_INFLADJ_MONTH | Value_SP500_PSR_QUARTER | Value_SP500_DIV_MONTH | Value_SP500_DIV_YEAR | Value_SP500_DIV_GROWTH_YEAR | ... | Value_SP500_REAL_EARNINGS_GROWTH_YEAR | Value_SP500_REAL_SALES_YEAR | Value_SP500_REAL_EARNINGS_GROWTH_QUARTER | Value_SP500_EARNINGS_GROWTH_QUARTER | Value_SP500_REAL_SALES_QUARTER | Value_SP500_EARNINGS_MONTH | Value_SP500_BVPS_YEAR | Value_SP500_EARNINGS_YEAR | Value_SP500_EARNINGS_GROWTH_YEAR | Value_SP500_BVPS_QUARTER | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 1871-01-01 | 4.44 | NaN | 11.10 | NaN | 9.01 | 89.81 | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1871-01-31 | NaN | 5.86 | NaN | NaN | NaN | NaN | NaN | 5.26 | 5.15 | NaN | ... | NaN | NaN | NaN | NaN | NaN | 8.09 | NaN | 7.92 | NaN | NaN |
| 1871-02-01 | 4.50 | NaN | 11.25 | 10.92 | 8.89 | 88.33 | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1871-02-28 | NaN | 5.78 | NaN | NaN | NaN | NaN | NaN | 5.10 | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | 7.85 | NaN | NaN | NaN | NaN |
| 1871-03-01 | 4.61 | NaN | 11.52 | 11.19 | 8.68 | 89.17 | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 35 columns
#Pandas profiling of df
pandas_profiling.ProfileReport(df)
Dataset info
| Number of variables | 36 |
|---|---|
| Number of observations | 3559 |
| Total Missing (%) | 15.2% |
| Total size in memory | 1001.0 KiB |
| Average record size in memory | 288.0 B |
Variables types
| Numeric | 8 |
|---|---|
| Categorical | 0 |
| Boolean | 0 |
| Date | 1 |
| Text (Unique) | 0 |
| Rejected | 27 |
| Unsupported | 0 |
Warnings
Value_SHILLER_PE_RATIO_MONTH has 1787 / 50.2% missing values MissingValue_SHILLER_PE_RATIO_YEAR is highly correlated with Value_SP500_PBV_RATIO_QUARTER (ρ = 1) RejectedValue_SP500_BVPS_QUARTER is highly correlated with Value_SP500_BVPS_YEAR (ρ = 1) RejectedValue_SP500_BVPS_YEAR is highly correlated with Value_SP500_SALES_QUARTER (ρ = 0.9637) RejectedValue_SP500_DIV_GROWTH_QUARTER is highly correlated with Value_SP500_DIV_GROWTH_YEAR (ρ = 1) RejectedValue_SP500_DIV_GROWTH_YEAR has 3522 / 99.0% missing values MissingValue_SP500_DIV_MONTH has 1786 / 50.2% missing values MissingValue_SP500_DIV_YEAR is highly correlated with Value_SP500_DIV_MONTH (ρ = 0.99996) RejectedValue_SP500_DIV_YIELD_MONTH has 1782 / 50.1% missing values MissingValue_SP500_DIV_YIELD_YEAR is highly correlated with Value_SP500_EARNINGS_YIELD_MONTH (ρ = 1) RejectedValue_SP500_EARNINGS_GROWTH_QUARTER is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_QUARTER (ρ = 0.99905) RejectedValue_SP500_EARNINGS_GROWTH_YEAR is highly correlated with Value_SP500_EARNINGS_GROWTH_QUARTER (ρ = 1) RejectedValue_SP500_EARNINGS_MONTH is highly correlated with Value_SP500_DIV_YEAR (ρ = 0.94144) RejectedValue_SP500_EARNINGS_YEAR is highly correlated with Value_SP500_EARNINGS_MONTH (ρ = 0.99996) RejectedValue_SP500_EARNINGS_YIELD_MONTH is highly correlated with Value_SP500_DIV_YIELD_MONTH (ρ = 1) RejectedValue_SP500_EARNINGS_YIELD_YEAR is highly correlated with Value_SP500_DIV_YIELD_YEAR (ρ = 1) RejectedValue_SP500_INFLADJ_MONTH is highly correlated with Value_SP500_REAL_PRICE_MONTH (ρ = 0.96721) RejectedValue_SP500_INFLADJ_YEAR is highly correlated with Value_SP500_PBV_RATIO_YEAR (ρ = 1) RejectedValue_SP500_PBV_RATIO_QUARTER is highly correlated with Value_SP500_INFLADJ_MONTH (ρ = 1) RejectedValue_SP500_PBV_RATIO_YEAR is highly correlated with Value_SP500_PE_RATIO_YEAR (ρ = 1) RejectedValue_SP500_PE_RATIO_MONTH has 1786 / 50.2% missing values MissingValue_SP500_PE_RATIO_YEAR is highly correlated with Value_SP500_PBV_RATIO_QUARTER (ρ = 1) RejectedValue_SP500_PSR_QUARTER is highly correlated with Value_SP500_INFLADJ_MONTH (ρ = 1) RejectedValue_SP500_PSR_YEAR is highly correlated with Value_SP500_PE_RATIO_YEAR (ρ = 1) RejectedValue_SP500_REAL_EARNINGS_GROWTH_QUARTER is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_YEAR (ρ = 1) RejectedValue_SP500_REAL_EARNINGS_GROWTH_YEAR has 3522 / 99.0% missing values MissingValue_SP500_REAL_PRICE_MONTH has 1788 / 50.2% missing values MissingValue_SP500_REAL_SALES_GROWTH_QUARTER is highly correlated with Value_SP500_SALES_GROWTH_YEAR (ρ = 0.99159) RejectedValue_SP500_REAL_SALES_GROWTH_YEAR is highly correlated with Value_SP500_SALES_GROWTH_QUARTER (ρ = 0.99159) RejectedValue_SP500_REAL_SALES_QUARTER is highly correlated with Value_SP500_REAL_SALES_YEAR (ρ = 0.99714) RejectedValue_SP500_REAL_SALES_YEAR is highly correlated with Value_SP500_SALES_QUARTER (ρ = 0.9333) RejectedValue_SP500_SALES_GROWTH_QUARTER is highly correlated with Value_SP500_REAL_SALES_GROWTH_QUARTER (ρ = 0.99183) RejectedValue_SP500_SALES_GROWTH_YEAR has 3534 / 99.3% missing values MissingValue_SP500_SALES_QUARTER is highly correlated with Value_SP500_SALES_YEAR (ρ = 1) RejectedValue_SP500_SALES_YEAR is highly correlated with Value_SP500_DIV_YEAR (ρ = 0.92834) RejectedDate
Date
| Distinct count | 3559 |
|---|---|
| Unique (%) | 100.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Minimum | 1871-01-01 00:00:00 |
|---|---|
| Maximum | 2018-12-31 00:00:00 |
Value_SHILLER_PE_RATIO_MONTH
Numeric
| Distinct count | 1189 |
|---|---|
| Unique (%) | 33.4% |
| Missing (%) | 50.2% |
| Missing (n) | 1787 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 16.57 |
|---|---|
| Minimum | 4.78 |
| Maximum | 44.19 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 4.78 |
|---|---|
| 5-th percentile | 7.982 |
| Q1 | 11.627 |
| Median | 15.67 |
| Q3 | 20.15 |
| 95-th percentile | 27.901 |
| Maximum | 44.19 |
| Range | 39.41 |
| Interquartile range | 8.5225 |
Descriptive statistics
| Standard deviation | 6.6713 |
|---|---|
| Coef of variation | 0.40262 |
| Kurtosis | 1.99 |
| Mean | 16.57 |
| MAD | 5.1052 |
| Skewness | 1.1235 |
| Sum | 29362 |
| Variance | 44.506 |
| Memory size | 27.9 KiB |
| Value | Count | Frequency (%) | |
| 11.34 | 6 | 0.2% |
|
| 17.82 | 6 | 0.2% |
|
| 16.6 | 5 | 0.1% |
|
| 12.05 | 5 | 0.1% |
|
| 13.8 | 5 | 0.1% |
|
| 17.05 | 4 | 0.1% |
|
| 18.2 | 4 | 0.1% |
|
| 15.27 | 4 | 0.1% |
|
| 16.16 | 4 | 0.1% |
|
| 15.47 | 4 | 0.1% |
|
| Other values (1178) | 1725 | 48.5% |
|
| (Missing) | 1787 | 50.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 4.78 | 1 | 0.0% |
|
| 5.02 | 1 | 0.0% |
|
| 5.04 | 1 | 0.0% |
|
| 5.08 | 1 | 0.0% |
|
| 5.12 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 43.22 | 1 | 0.0% |
|
| 43.53 | 1 | 0.0% |
|
| 43.77 | 1 | 0.0% |
|
| 43.83 | 1 | 0.0% |
|
| 44.19 | 1 | 0.0% |
|
Value_SHILLER_PE_RATIO_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_PBV_RATIO_QUARTER and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_BVPS_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_BVPS_YEAR and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_BVPS_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_SALES_QUARTER and should be ignored for analysis
| Correlation | 0.9637 |
|---|
Value_SP500_DIV_GROWTH_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_DIV_GROWTH_YEAR and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_DIV_GROWTH_YEAR
Numeric
| Distinct count | 34 |
|---|---|
| Unique (%) | 1.0% |
| Missing (%) | 99.0% |
| Missing (n) | 3522 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 6.5478 |
|---|---|
| Minimum | -21.07 |
| Maximum | 18.25 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -21.07 |
|---|---|
| 5-th percentile | -2.668 |
| Q1 | 3.07 |
| Median | 7.07 |
| Q3 | 11.45 |
| 95-th percentile | 16.26 |
| Maximum | 18.25 |
| Range | 39.32 |
| Interquartile range | 8.38 |
Descriptive statistics
| Standard deviation | 6.9429 |
|---|---|
| Coef of variation | 1.0603 |
| Kurtosis | 5.896 |
| Mean | 6.5478 |
| MAD | 4.8668 |
| Skewness | -1.6172 |
| Sum | 242.27 |
| Variance | 48.204 |
| Memory size | 27.9 KiB |
| Value | Count | Frequency (%) | |
| 7.99 | 2 | 0.1% |
|
| 5.33 | 2 | 0.1% |
|
| 7.07 | 2 | 0.1% |
|
| 16.26 | 2 | 0.1% |
|
| 9.33 | 1 | 0.0% |
|
| 1.49 | 1 | 0.0% |
|
| 8.16 | 1 | 0.0% |
|
| 12.01 | 1 | 0.0% |
|
| 10.0 | 1 | 0.0% |
|
| 11.99 | 1 | 0.0% |
|
| Other values (23) | 23 | 0.6% |
|
| (Missing) | 3522 | 99.0% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -21.07 | 1 | 0.0% |
|
| -3.26 | 1 | 0.0% |
|
| -2.52 | 1 | 0.0% |
|
| 0.97 | 1 | 0.0% |
|
| 1.45 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 12.72 | 1 | 0.0% |
|
| 13.38 | 1 | 0.0% |
|
| 14.27 | 1 | 0.0% |
|
| 16.26 | 2 | 0.1% |
|
| 18.25 | 1 | 0.0% |
|
Value_SP500_DIV_MONTH
Numeric
| Distinct count | 1119 |
|---|---|
| Unique (%) | 31.4% |
| Missing (%) | 50.2% |
| Missing (n) | 1786 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 15.415 |
|---|---|
| Minimum | 5.03 |
| Maximum | 52.26 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 5.03 |
|---|---|
| 5-th percentile | 6.7 |
| Q1 | 8.61 |
| Median | 12.88 |
| Q3 | 19.6 |
| 95-th percentile | 32.694 |
| Maximum | 52.26 |
| Range | 47.23 |
| Interquartile range | 10.99 |
Descriptive statistics
| Standard deviation | 8.7887 |
|---|---|
| Coef of variation | 0.57013 |
| Kurtosis | 3.0931 |
| Mean | 15.415 |
| MAD | 6.7561 |
| Skewness | 1.5803 |
| Sum | 27331 |
| Variance | 77.241 |
| Memory size | 27.9 KiB |
| Value | Count | Frequency (%) | |
| 7.19 | 9 | 0.3% |
|
| 7.29 | 8 | 0.2% |
|
| 8.09 | 7 | 0.2% |
|
| 6.81 | 7 | 0.2% |
|
| 7.11 | 6 | 0.2% |
|
| 8.4 | 6 | 0.2% |
|
| 8.95 | 6 | 0.2% |
|
| 7.22 | 6 | 0.2% |
|
| 9.11 | 5 | 0.1% |
|
| 7.71 | 5 | 0.1% |
|
| Other values (1108) | 1708 | 48.0% |
|
| (Missing) | 1786 | 50.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 5.03 | 1 | 0.0% |
|
| 5.04 | 1 | 0.0% |
|
| 5.1 | 1 | 0.0% |
|
| 5.17 | 2 | 0.1% |
|
| 5.18 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 50.75 | 1 | 0.0% |
|
| 51.0 | 1 | 0.0% |
|
| 51.45 | 1 | 0.0% |
|
| 51.87 | 1 | 0.0% |
|
| 52.26 | 1 | 0.0% |
|
Value_SP500_DIV_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_DIV_MONTH and should be ignored for analysis
| Correlation | 0.99996 |
|---|
Value_SP500_DIV_YIELD_MONTH
Numeric
| Distinct count | 608 |
|---|---|
| Unique (%) | 17.1% |
| Missing (%) | 50.1% |
| Missing (n) | 1782 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 4.3421 |
|---|---|
| Minimum | 1.11 |
| Maximum | 13.84 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1.11 |
|---|---|
| 5-th percentile | 1.74 |
| Q1 | 3.15 |
| Median | 4.28 |
| Q3 | 5.39 |
| 95-th percentile | 7.18 |
| Maximum | 13.84 |
| Range | 12.73 |
| Interquartile range | 2.24 |
Descriptive statistics
| Standard deviation | 1.7025 |
|---|---|
| Coef of variation | 0.39208 |
| Kurtosis | 0.83756 |
| Mean | 4.3421 |
| MAD | 1.3453 |
| Skewness | 0.48864 |
| Sum | 7715.9 |
| Variance | 2.8984 |
| Memory size | 27.9 KiB |
| Value | Count | Frequency (%) | |
| 4.55 | 10 | 0.3% |
|
| 4.22 | 10 | 0.3% |
|
| 5.18 | 10 | 0.3% |
|
| 5.22 | 10 | 0.3% |
|
| 4.43 | 9 | 0.3% |
|
| 3.53 | 9 | 0.3% |
|
| 1.76 | 9 | 0.3% |
|
| 3.87 | 8 | 0.2% |
|
| 4.69 | 8 | 0.2% |
|
| 2.9 | 8 | 0.2% |
|
| Other values (597) | 1686 | 47.4% |
|
| (Missing) | 1782 | 50.1% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.11 | 2 | 0.1% |
|
| 1.13 | 1 | 0.0% |
|
| 1.14 | 1 | 0.0% |
|
| 1.15 | 1 | 0.0% |
|
| 1.16 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 10.15 | 1 | 0.0% |
|
| 11.36 | 1 | 0.0% |
|
| 12.46 | 1 | 0.0% |
|
| 12.64 | 1 | 0.0% |
|
| 13.84 | 1 | 0.0% |
|
Value_SP500_DIV_YIELD_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_EARNINGS_YIELD_MONTH and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_EARNINGS_GROWTH_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_QUARTER and should be ignored for analysis
| Correlation | 0.99905 |
|---|
Value_SP500_EARNINGS_GROWTH_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_EARNINGS_GROWTH_QUARTER and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_EARNINGS_MONTH
Highly correlated
This variable is highly correlated with Value_SP500_DIV_YEAR and should be ignored for analysis
| Correlation | 0.94144 |
|---|
Value_SP500_EARNINGS_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_EARNINGS_MONTH and should be ignored for analysis
| Correlation | 0.99996 |
|---|
Value_SP500_EARNINGS_YIELD_MONTH
Highly correlated
This variable is highly correlated with Value_SP500_DIV_YIELD_MONTH and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_EARNINGS_YIELD_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_DIV_YIELD_YEAR and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_INFLADJ_MONTH
Highly correlated
This variable is highly correlated with Value_SP500_REAL_PRICE_MONTH and should be ignored for analysis
| Correlation | 0.96721 |
|---|
Value_SP500_INFLADJ_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_PBV_RATIO_YEAR and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_PBV_RATIO_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_INFLADJ_MONTH and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_PBV_RATIO_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_PE_RATIO_YEAR and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_PE_RATIO_MONTH
Numeric
| Distinct count | 1117 |
|---|---|
| Unique (%) | 31.4% |
| Missing (%) | 50.2% |
| Missing (n) | 1786 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 15.725 |
|---|---|
| Minimum | 5.31 |
| Maximum | 123.73 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 5.31 |
|---|---|
| 5-th percentile | 7.83 |
| Q1 | 11.45 |
| Median | 14.73 |
| Q3 | 18.04 |
| 95-th percentile | 25.798 |
| Maximum | 123.73 |
| Range | 118.42 |
| Interquartile range | 6.59 |
Descriptive statistics
| Standard deviation | 8.4055 |
|---|---|
| Coef of variation | 0.53453 |
| Kurtosis | 67.915 |
| Mean | 15.725 |
| MAD | 4.6369 |
| Skewness | 6.4631 |
| Sum | 27880 |
| Variance | 70.653 |
| Memory size | 27.9 KiB |
| Value | Count | Frequency (%) | |
| 15.61 | 6 | 0.2% |
|
| 19.0 | 5 | 0.1% |
|
| 13.82 | 5 | 0.1% |
|
| 9.84 | 5 | 0.1% |
|
| 7.97 | 5 | 0.1% |
|
| 11.48 | 5 | 0.1% |
|
| 12.21 | 5 | 0.1% |
|
| 17.48 | 5 | 0.1% |
|
| 17.83 | 5 | 0.1% |
|
| 12.56 | 4 | 0.1% |
|
| Other values (1106) | 1723 | 48.4% |
|
| (Missing) | 1786 | 50.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 5.31 | 1 | 0.0% |
|
| 5.41 | 1 | 0.0% |
|
| 5.74 | 1 | 0.0% |
|
| 5.81 | 1 | 0.0% |
|
| 5.82 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 101.87 | 1 | 0.0% |
|
| 110.37 | 1 | 0.0% |
|
| 119.85 | 1 | 0.0% |
|
| 123.32 | 1 | 0.0% |
|
| 123.73 | 1 | 0.0% |
|
Value_SP500_PE_RATIO_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_PBV_RATIO_QUARTER and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_PSR_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_INFLADJ_MONTH and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_PSR_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_PE_RATIO_YEAR and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_REAL_EARNINGS_GROWTH_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_YEAR and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_REAL_EARNINGS_GROWTH_YEAR
Numeric
| Distinct count | 36 |
|---|---|
| Unique (%) | 1.0% |
| Missing (%) | 99.0% |
| Missing (n) | 3522 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 12.684 |
|---|---|
| Minimum | -79.48 |
| Maximum | 261.66 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -79.48 |
|---|---|
| 5-th percentile | -32.784 |
| Q1 | 0.3 |
| Median | 10.69 |
| Q3 | 14.93 |
| 95-th percentile | 54.266 |
| Maximum | 261.66 |
| Range | 341.14 |
| Interquartile range | 14.63 |
Descriptive statistics
| Standard deviation | 48.913 |
|---|---|
| Coef of variation | 3.8564 |
| Kurtosis | 19.396 |
| Mean | 12.684 |
| MAD | 21.87 |
| Skewness | 3.637 |
| Sum | 469.29 |
| Variance | 2392.5 |
| Memory size | 27.9 KiB |
| Value | Count | Frequency (%) | |
| 8.86 | 2 | 0.1% |
|
| 7.81 | 2 | 0.1% |
|
| -6.49 | 1 | 0.0% |
|
| 15.9 | 1 | 0.0% |
|
| -11.6 | 1 | 0.0% |
|
| -79.48 | 1 | 0.0% |
|
| 10.69 | 1 | 0.0% |
|
| 11.38 | 1 | 0.0% |
|
| 0.36 | 1 | 0.0% |
|
| -51.84 | 1 | 0.0% |
|
| Other values (25) | 25 | 0.7% |
|
| (Missing) | 3522 | 99.0% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -79.48 | 1 | 0.0% |
|
| -51.84 | 1 | 0.0% |
|
| -28.02 | 1 | 0.0% |
|
| -21.05 | 1 | 0.0% |
|
| -15.56 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 24.85 | 1 | 0.0% |
|
| 36.07 | 1 | 0.0% |
|
| 49.72 | 1 | 0.0% |
|
| 72.45 | 1 | 0.0% |
|
| 261.66 | 1 | 0.0% |
|
Value_SP500_REAL_PRICE_MONTH
Numeric
| Distinct count | 1401 |
|---|---|
| Unique (%) | 39.4% |
| Missing (%) | 50.2% |
| Missing (n) | 1788 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 262.57 |
|---|---|
| Minimum | 2.73 |
| Maximum | 2789.8 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 2.73 |
|---|---|
| 5-th percentile | 4.4 |
| Q1 | 7.745 |
| Median | 16.5 |
| Q3 | 123.65 |
| 95-th percentile | 1420.4 |
| Maximum | 2789.8 |
| Range | 2787.1 |
| Interquartile range | 115.91 |
Descriptive statistics
| Standard deviation | 523.67 |
|---|---|
| Coef of variation | 1.9944 |
| Kurtosis | 5.389 |
| Mean | 262.57 |
| MAD | 359.68 |
| Skewness | 2.4198 |
| Sum | 465010 |
| Variance | 274230 |
| Memory size | 27.9 KiB |
| Value | Count | Frequency (%) | |
| 4.37 | 7 | 0.2% |
|
| 4.46 | 7 | 0.2% |
|
| 5.3 | 6 | 0.2% |
|
| 7.68 | 6 | 0.2% |
|
| 5.18 | 6 | 0.2% |
|
| 5.32 | 6 | 0.2% |
|
| 4.34 | 5 | 0.1% |
|
| 4.38 | 5 | 0.1% |
|
| 8.12 | 5 | 0.1% |
|
| 4.65 | 5 | 0.1% |
|
| Other values (1390) | 1713 | 48.1% |
|
| (Missing) | 1788 | 50.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 2.73 | 1 | 0.0% |
|
| 2.85 | 1 | 0.0% |
|
| 2.94 | 2 | 0.1% |
|
| 3.05 | 1 | 0.0% |
|
| 3.17 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2702.77 | 1 | 0.0% |
|
| 2705.16 | 1 | 0.0% |
|
| 2736.61 | 1 | 0.0% |
|
| 2754.35 | 1 | 0.0% |
|
| 2789.8 | 1 | 0.0% |
|
Value_SP500_REAL_SALES_GROWTH_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_SALES_GROWTH_YEAR and should be ignored for analysis
| Correlation | 0.99159 |
|---|
Value_SP500_REAL_SALES_GROWTH_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_SALES_GROWTH_QUARTER and should be ignored for analysis
| Correlation | 0.99159 |
|---|
Value_SP500_REAL_SALES_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_REAL_SALES_YEAR and should be ignored for analysis
| Correlation | 0.99714 |
|---|
Value_SP500_REAL_SALES_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_SALES_QUARTER and should be ignored for analysis
| Correlation | 0.9333 |
|---|
Value_SP500_SALES_GROWTH_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_REAL_SALES_GROWTH_QUARTER and should be ignored for analysis
| Correlation | 0.99183 |
|---|
Value_SP500_SALES_GROWTH_YEAR
Numeric
| Distinct count | 22 |
|---|---|
| Unique (%) | 0.6% |
| Missing (%) | 99.3% |
| Missing (n) | 3534 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 4.2676 |
|---|---|
| Minimum | -12.86 |
| Maximum | 10.93 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -12.86 |
|---|---|
| 5-th percentile | -7.382 |
| Q1 | 2.09 |
| Median | 5.68 |
| Q3 | 7.68 |
| 95-th percentile | 10.576 |
| Maximum | 10.93 |
| Range | 23.79 |
| Interquartile range | 5.59 |
Descriptive statistics
| Standard deviation | 5.775 |
|---|---|
| Coef of variation | 1.3532 |
| Kurtosis | 2.4996 |
| Mean | 4.2676 |
| MAD | 4.2259 |
| Skewness | -1.5198 |
| Sum | 106.69 |
| Variance | 33.351 |
| Memory size | 27.9 KiB |
| Value | Count | Frequency (%) | |
| 2.09 | 2 | 0.1% |
|
| 9.36 | 2 | 0.1% |
|
| 7.68 | 2 | 0.1% |
|
| 7.03 | 2 | 0.1% |
|
| 1.7 | 1 | 0.0% |
|
| 5.68 | 1 | 0.0% |
|
| 5.37 | 1 | 0.0% |
|
| 10.93 | 1 | 0.0% |
|
| 4.16 | 1 | 0.0% |
|
| -1.18 | 1 | 0.0% |
|
| Other values (11) | 11 | 0.3% |
|
| (Missing) | 3534 | 99.3% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -12.86 | 1 | 0.0% |
|
| -8.45 | 1 | 0.0% |
|
| -3.11 | 1 | 0.0% |
|
| -1.18 | 1 | 0.0% |
|
| 1.7 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 8.94 | 1 | 0.0% |
|
| 9.03 | 1 | 0.0% |
|
| 9.36 | 2 | 0.1% |
|
| 10.88 | 1 | 0.0% |
|
| 10.93 | 1 | 0.0% |
|
Value_SP500_SALES_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_SALES_YEAR and should be ignored for analysis
| Correlation | 1 |
|---|
Value_SP500_SALES_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_DIV_YEAR and should be ignored for analysis
| Correlation | 0.92834 |
|---|
| Value_SP500_REAL_PRICE_MONTH | Value_SP500_DIV_YIELD_MONTH | Value_SP500_PE_RATIO_MONTH | Value_SHILLER_PE_RATIO_MONTH | Value_SP500_EARNINGS_YIELD_MONTH | Value_SP500_INFLADJ_MONTH | Value_SP500_PSR_QUARTER | Value_SP500_DIV_MONTH | Value_SP500_DIV_YEAR | Value_SP500_DIV_GROWTH_YEAR | Value_SP500_DIV_GROWTH_QUARTER | Value_SP500_PBV_RATIO_QUARTER | Value_SHILLER_PE_RATIO_YEAR | Value_SP500_PE_RATIO_YEAR | Value_SP500_DIV_YIELD_YEAR | Value_SP500_PSR_YEAR | Value_SP500_EARNINGS_YIELD_YEAR | Value_SP500_PBV_RATIO_YEAR | Value_SP500_INFLADJ_YEAR | Value_SP500_SALES_YEAR | Value_SP500_SALES_GROWTH_YEAR | Value_SP500_SALES_QUARTER | Value_SP500_REAL_SALES_GROWTH_QUARTER | Value_SP500_SALES_GROWTH_QUARTER | Value_SP500_REAL_SALES_GROWTH_YEAR | Value_SP500_REAL_EARNINGS_GROWTH_YEAR | Value_SP500_REAL_SALES_YEAR | Value_SP500_REAL_EARNINGS_GROWTH_QUARTER | Value_SP500_EARNINGS_GROWTH_QUARTER | Value_SP500_REAL_SALES_QUARTER | Value_SP500_EARNINGS_MONTH | Value_SP500_BVPS_YEAR | Value_SP500_EARNINGS_YEAR | Value_SP500_EARNINGS_GROWTH_YEAR | Value_SP500_BVPS_QUARTER | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||||||||||||||||
| 1871-01-01 | 4.44 | NaN | 11.10 | NaN | 9.01 | 89.81 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 11.1 | NaN | NaN | 9.01 | NaN | 89.81 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1871-01-31 | NaN | 5.86 | NaN | NaN | NaN | NaN | NaN | 5.26 | 5.15 | NaN | NaN | NaN | NaN | NaN | 5.86 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 8.09 | NaN | 7.92 | NaN | NaN |
| 1871-02-01 | 4.50 | NaN | 11.25 | 10.92 | 8.89 | 88.33 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1871-02-28 | NaN | 5.78 | NaN | NaN | NaN | NaN | NaN | 5.10 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 7.85 | NaN | NaN | NaN | NaN |
| 1871-03-01 | 4.61 | NaN | 11.52 | 11.19 | 8.68 | 89.17 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
def Column_missing_values(df):
'''
Function to find missing values or NaN values per column and print percentage of missing values per column
in bar chart.
IN - any pandas dataframe
'''
#Percentage of missing values per column
missing_columns = df.isnull().sum()
if missing_columns.values.any() != 0:
missing_columns = (missing_columns[missing_columns>0]/df.shape[0]) * 100
missing_columns.sort_values(inplace=True)
missing_columns.plot.bar(title = 'Column wise percentage missing values', figsize=(8,4))
else:
print('No missing values in provided dataframe')
return
Column_missing_values(df)
#Correlation plot for numerical encoded version of API data in df.
sns.heatmap(df.corr(),annot=None,fmt='.2f',square=False)
<matplotlib.axes._subplots.AxesSubplot at 0x1e5362196a0>
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(df)
plt.show()
#Drop last rows with NaN values for Value_SP500_REAL_PRICE_MONTH because these were outlier with no price values.
df.drop(df.tail(11).index,axis=0,inplace=True)
Original dataset has many missing or NaN values which is noise to end results. General methods like replacing with mean values can not be applied because it would add bias to the timeseries dataset.
import impyute.imputation.cs
import impyute.imputation.ts
from sklearn.preprocessing import Imputer
from sklearn.impute import SimpleImputer, MissingIndicator
from statsmodels.tsa.arima_model import ARIMA
from sklearn.model_selection import cross_val_score
#Create an imputation object
imputer_most_frequent= SimpleImputer(missing_values=np.nan,strategy ='most_frequent')
#Inject imputed values in the dataset.
df_imputed = pd.DataFrame(imputer_most_frequent.fit_transform(df))
df_imputed.columns = df.columns
df_imputed.index = df.index
pandas_profiling.ProfileReport(df_imputed)
Dataset info
| Number of variables | 36 |
|---|---|
| Number of observations | 3548 |
| Total Missing (%) | 0.0% |
| Total size in memory | 998.0 KiB |
| Average record size in memory | 288.0 B |
Variables types
| Numeric | 25 |
|---|---|
| Categorical | 0 |
| Boolean | 0 |
| Date | 1 |
| Text (Unique) | 0 |
| Rejected | 10 |
| Unsupported | 0 |
Warnings
Value_SP500_BVPS_QUARTER is highly correlated with Value_SP500_REAL_SALES_QUARTER (ρ = 0.951) RejectedValue_SP500_BVPS_YEAR is highly correlated with Value_SP500_REAL_SALES_YEAR (ρ = 0.9145) RejectedValue_SP500_EARNINGS_GROWTH_QUARTER is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_QUARTER (ρ = 0.99834) RejectedValue_SP500_EARNINGS_GROWTH_YEAR is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_YEAR (ρ = 0.9994) RejectedValue_SP500_EARNINGS_MONTH is highly correlated with Value_SP500_DIV_MONTH (ρ = 0.94595) RejectedValue_SP500_EARNINGS_YEAR is highly correlated with Value_SP500_DIV_YEAR (ρ = 0.94718) RejectedValue_SP500_INFLADJ_MONTH is highly correlated with Value_SP500_REAL_PRICE_MONTH (ρ = 0.95138) RejectedValue_SP500_PBV_RATIO_YEAR is highly skewed (γ1 = 23.544) SkewedValue_SP500_PSR_YEAR is highly skewed (γ1 = 20.076) SkewedValue_SP500_REAL_EARNINGS_GROWTH_QUARTER is highly skewed (γ1 = 32.842) SkewedValue_SP500_REAL_SALES_GROWTH_YEAR is highly correlated with Value_SP500_SALES_GROWTH_YEAR (ρ = 0.99905) RejectedValue_SP500_REAL_SALES_QUARTER is highly correlated with Value_SP500_SALES_QUARTER (ρ = 0.98094) RejectedValue_SP500_REAL_SALES_YEAR is highly correlated with Value_SP500_REAL_SALES_GROWTH_YEAR (ρ = 0.91447) RejectedDate
Date
| Distinct count | 3548 |
|---|---|
| Unique (%) | 100.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Minimum | 1871-01-01 00:00:00 |
|---|---|
| Maximum | 2018-12-31 00:00:00 |
Value_SHILLER_PE_RATIO_MONTH
Numeric
| Distinct count | 1188 |
|---|---|
| Unique (%) | 33.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 13.952 |
|---|---|
| Minimum | 4.78 |
| Maximum | 44.19 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 4.78 |
|---|---|
| 5-th percentile | 9.26 |
| Q1 | 11.34 |
| Median | 11.34 |
| Q3 | 15.66 |
| 95-th percentile | 25.41 |
| Maximum | 44.19 |
| Range | 39.41 |
| Interquartile range | 4.32 |
Descriptive statistics
| Standard deviation | 5.3909 |
|---|---|
| Coef of variation | 0.38639 |
| Kurtosis | 6.0424 |
| Mean | 13.952 |
| MAD | 3.9298 |
| Skewness | 2.1774 |
| Sum | 49501 |
| Variance | 29.061 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 11.34 | 1782 | 50.2% |
|
| 17.82 | 6 | 0.2% |
|
| 13.8 | 5 | 0.1% |
|
| 16.6 | 5 | 0.1% |
|
| 12.05 | 5 | 0.1% |
|
| 16.83 | 4 | 0.1% |
|
| 10.0 | 4 | 0.1% |
|
| 10.91 | 4 | 0.1% |
|
| 15.47 | 4 | 0.1% |
|
| 18.96 | 4 | 0.1% |
|
| Other values (1178) | 1725 | 48.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 4.78 | 1 | 0.0% |
|
| 5.02 | 1 | 0.0% |
|
| 5.04 | 1 | 0.0% |
|
| 5.08 | 1 | 0.0% |
|
| 5.12 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 43.22 | 1 | 0.0% |
|
| 43.53 | 1 | 0.0% |
|
| 43.77 | 1 | 0.0% |
|
| 43.83 | 1 | 0.0% |
|
| 44.19 | 1 | 0.0% |
|
Value_SHILLER_PE_RATIO_YEAR
Numeric
| Distinct count | 141 |
|---|---|
| Unique (%) | 4.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 12.108 |
|---|---|
| Minimum | 5.12 |
| Maximum | 43.77 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 5.12 |
|---|---|
| 5-th percentile | 11.9 |
| Q1 | 11.9 |
| Median | 11.9 |
| Q3 | 11.9 |
| 95-th percentile | 11.9 |
| Maximum | 43.77 |
| Range | 38.65 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 1.7274 |
|---|---|
| Coef of variation | 0.14267 |
| Kurtosis | 112.09 |
| Mean | 12.108 |
| MAD | 0.45349 |
| Skewness | 9.3367 |
| Sum | 42958 |
| Variance | 2.9838 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 11.9 | 3402 | 95.9% |
|
| 17.22 | 2 | 0.1% |
|
| 9.26 | 2 | 0.1% |
|
| 17.09 | 2 | 0.1% |
|
| 18.47 | 2 | 0.1% |
|
| 20.32 | 2 | 0.1% |
|
| 13.9 | 2 | 0.1% |
|
| 27.21 | 1 | 0.0% |
|
| 22.9 | 1 | 0.0% |
|
| 20.97 | 1 | 0.0% |
|
| Other values (131) | 131 | 3.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 5.12 | 1 | 0.0% |
|
| 5.99 | 1 | 0.0% |
|
| 6.1 | 1 | 0.0% |
|
| 6.29 | 1 | 0.0% |
|
| 6.64 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 33.31 | 1 | 0.0% |
|
| 33.36 | 1 | 0.0% |
|
| 36.98 | 1 | 0.0% |
|
| 40.57 | 1 | 0.0% |
|
| 43.77 | 1 | 0.0% |
|
Value_SP500_BVPS_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_REAL_SALES_QUARTER and should be ignored for analysis
| Correlation | 0.951 |
|---|
Value_SP500_BVPS_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_REAL_SALES_YEAR and should be ignored for analysis
| Correlation | 0.9145 |
|---|
Value_SP500_DIV_GROWTH_QUARTER
Numeric
| Distinct count | 113 |
|---|---|
| Unique (%) | 3.2% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 11.696 |
|---|---|
| Minimum | -21.07 |
| Maximum | 18.25 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -21.07 |
|---|---|
| 5-th percentile | 11.89 |
| Q1 | 11.89 |
| Median | 11.89 |
| Q3 | 11.89 |
| 95-th percentile | 11.89 |
| Maximum | 18.25 |
| Range | 39.32 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 1.6985 |
|---|---|
| Coef of variation | 0.14523 |
| Kurtosis | 144.27 |
| Mean | 11.696 |
| MAD | 0.4133 |
| Skewness | -10.499 |
| Sum | 41496 |
| Variance | 2.8849 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 11.89 | 3434 | 96.8% |
|
| 12.65 | 2 | 0.1% |
|
| 12.49 | 2 | 0.1% |
|
| 10.46 | 1 | 0.0% |
|
| 18.25 | 1 | 0.0% |
|
| 5.05 | 1 | 0.0% |
|
| 1.55 | 1 | 0.0% |
|
| -4.71 | 1 | 0.0% |
|
| 1.26 | 1 | 0.0% |
|
| 8.02 | 1 | 0.0% |
|
| Other values (103) | 103 | 2.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -21.07 | 1 | 0.0% |
|
| -19.63 | 1 | 0.0% |
|
| -17.17 | 1 | 0.0% |
|
| -13.9 | 1 | 0.0% |
|
| -10.86 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 16.74 | 1 | 0.0% |
|
| 17.4 | 1 | 0.0% |
|
| 17.47 | 1 | 0.0% |
|
| 17.51 | 1 | 0.0% |
|
| 18.25 | 1 | 0.0% |
|
Value_SP500_DIV_GROWTH_YEAR
Numeric
| Distinct count | 32 |
|---|---|
| Unique (%) | 0.9% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | -20.824 |
|---|---|
| Minimum | -21.07 |
| Maximum | 18.25 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -21.07 |
|---|---|
| 5-th percentile | -21.07 |
| Q1 | -21.07 |
| Median | -21.07 |
| Q3 | -21.07 |
| 95-th percentile | -21.07 |
| Maximum | 18.25 |
| Range | 39.32 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 2.6645 |
|---|---|
| Coef of variation | -0.12795 |
| Kurtosis | 125.17 |
| Mean | -20.824 |
| MAD | 0.48728 |
| Skewness | 11.107 |
| Sum | -73884 |
| Variance | 7.0994 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| -21.07 | 3517 | 99.1% |
|
| 6.99 | 1 | 0.0% |
|
| 3.07 | 1 | 0.0% |
|
| 10.0 | 1 | 0.0% |
|
| 1.49 | 1 | 0.0% |
|
| 9.33 | 1 | 0.0% |
|
| 12.01 | 1 | 0.0% |
|
| 11.99 | 1 | 0.0% |
|
| 0.97 | 1 | 0.0% |
|
| 2.12 | 1 | 0.0% |
|
| Other values (22) | 22 | 0.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -21.07 | 3517 | 99.1% |
|
| -3.26 | 1 | 0.0% |
|
| -2.52 | 1 | 0.0% |
|
| 0.97 | 1 | 0.0% |
|
| 1.45 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 12.72 | 1 | 0.0% |
|
| 13.38 | 1 | 0.0% |
|
| 14.27 | 1 | 0.0% |
|
| 16.26 | 1 | 0.0% |
|
| 18.25 | 1 | 0.0% |
|
Value_SP500_DIV_MONTH
Numeric
| Distinct count | 1118 |
|---|---|
| Unique (%) | 31.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 11.3 |
|---|---|
| Minimum | 5.03 |
| Maximum | 52.26 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 5.03 |
|---|---|
| 5-th percentile | 7.11 |
| Q1 | 7.19 |
| Median | 7.19 |
| Q3 | 12.843 |
| 95-th percentile | 24.97 |
| Maximum | 52.26 |
| Range | 47.23 |
| Interquartile range | 5.6525 |
Descriptive statistics
| Standard deviation | 7.4502 |
|---|---|
| Coef of variation | 0.6593 |
| Kurtosis | 7.1196 |
| Mean | 11.3 |
| MAD | 5.3875 |
| Skewness | 2.448 |
| Sum | 40093 |
| Variance | 55.506 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 7.19 | 1784 | 50.3% |
|
| 7.29 | 8 | 0.2% |
|
| 6.81 | 7 | 0.2% |
|
| 8.09 | 7 | 0.2% |
|
| 8.4 | 6 | 0.2% |
|
| 8.95 | 6 | 0.2% |
|
| 7.22 | 6 | 0.2% |
|
| 7.11 | 6 | 0.2% |
|
| 6.94 | 5 | 0.1% |
|
| 7.05 | 5 | 0.1% |
|
| Other values (1108) | 1708 | 48.1% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 5.03 | 1 | 0.0% |
|
| 5.04 | 1 | 0.0% |
|
| 5.1 | 1 | 0.0% |
|
| 5.17 | 2 | 0.1% |
|
| 5.18 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 50.75 | 1 | 0.0% |
|
| 51.0 | 1 | 0.0% |
|
| 51.45 | 1 | 0.0% |
|
| 51.87 | 1 | 0.0% |
|
| 52.26 | 1 | 0.0% |
|
Value_SP500_DIV_YEAR
Numeric
| Distinct count | 146 |
|---|---|
| Unique (%) | 4.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 7.1088 |
|---|---|
| Minimum | 5.04 |
| Maximum | 52.26 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 5.04 |
|---|---|
| 5-th percentile | 6.7 |
| Q1 | 6.7 |
| Median | 6.7 |
| Q3 | 6.7 |
| 95-th percentile | 6.7 |
| Maximum | 52.26 |
| Range | 47.22 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 2.8765 |
|---|---|
| Coef of variation | 0.40463 |
| Kurtosis | 118.18 |
| Mean | 7.1088 |
| MAD | 0.79031 |
| Skewness | 9.9101 |
| Sum | 25222 |
| Variance | 8.2741 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 6.7 | 3398 | 95.8% |
|
| 7.11 | 2 | 0.1% |
|
| 6.81 | 2 | 0.1% |
|
| 7.61 | 2 | 0.1% |
|
| 15.89 | 2 | 0.1% |
|
| 12.17 | 2 | 0.1% |
|
| 24.21 | 1 | 0.0% |
|
| 9.18 | 1 | 0.0% |
|
| 8.07 | 1 | 0.0% |
|
| 16.97 | 1 | 0.0% |
|
| Other values (136) | 136 | 3.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 5.04 | 1 | 0.0% |
|
| 5.15 | 1 | 0.0% |
|
| 5.18 | 1 | 0.0% |
|
| 5.2 | 1 | 0.0% |
|
| 5.55 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 48.17 | 1 | 0.0% |
|
| 50.0 | 1 | 0.0% |
|
| 50.03 | 1 | 0.0% |
|
| 51.02 | 1 | 0.0% |
|
| 52.26 | 1 | 0.0% |
|
Value_SP500_DIV_YIELD_MONTH
Numeric
| Distinct count | 607 |
|---|---|
| Unique (%) | 17.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 4.2812 |
|---|---|
| Minimum | 1.11 |
| Maximum | 13.84 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1.11 |
|---|---|
| 5-th percentile | 1.96 |
| Q1 | 4.22 |
| Median | 4.22 |
| Q3 | 4.28 |
| 95-th percentile | 6.57 |
| Maximum | 13.84 |
| Range | 12.73 |
| Interquartile range | 0.06 |
Descriptive statistics
| Standard deviation | 1.2062 |
|---|---|
| Coef of variation | 0.28175 |
| Kurtosis | 4.7742 |
| Mean | 4.2812 |
| MAD | 0.70379 |
| Skewness | 0.83882 |
| Sum | 15190 |
| Variance | 1.455 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 4.22 | 1781 | 50.2% |
|
| 5.22 | 10 | 0.3% |
|
| 4.55 | 10 | 0.3% |
|
| 5.18 | 10 | 0.3% |
|
| 4.43 | 9 | 0.3% |
|
| 3.53 | 9 | 0.3% |
|
| 1.76 | 9 | 0.3% |
|
| 4.17 | 8 | 0.2% |
|
| 2.93 | 8 | 0.2% |
|
| 4.1 | 8 | 0.2% |
|
| Other values (597) | 1686 | 47.5% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.11 | 2 | 0.1% |
|
| 1.13 | 1 | 0.0% |
|
| 1.14 | 1 | 0.0% |
|
| 1.15 | 1 | 0.0% |
|
| 1.16 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 10.15 | 1 | 0.0% |
|
| 11.36 | 1 | 0.0% |
|
| 12.46 | 1 | 0.0% |
|
| 12.64 | 1 | 0.0% |
|
| 13.84 | 1 | 0.0% |
|
Value_SP500_DIV_YIELD_YEAR
Numeric
| Distinct count | 137 |
|---|---|
| Unique (%) | 3.9% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 4.0815 |
|---|---|
| Minimum | 1.17 |
| Maximum | 10.15 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1.17 |
|---|---|
| 5-th percentile | 4.07 |
| Q1 | 4.07 |
| Median | 4.07 |
| Q3 | 4.07 |
| 95-th percentile | 4.07 |
| Maximum | 10.15 |
| Range | 8.98 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.36379 |
|---|---|
| Coef of variation | 0.089132 |
| Kurtosis | 79.461 |
| Mean | 4.0815 |
| MAD | 0.069995 |
| Skewness | 4.2121 |
| Sum | 14481 |
| Variance | 0.13234 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 4.07 | 3400 | 95.8% |
|
| 6.2 | 2 | 0.1% |
|
| 5.41 | 2 | 0.1% |
|
| 4.4 | 2 | 0.1% |
|
| 3.49 | 2 | 0.1% |
|
| 3.53 | 2 | 0.1% |
|
| 3.81 | 2 | 0.1% |
|
| 1.76 | 2 | 0.1% |
|
| 1.61 | 2 | 0.1% |
|
| 5.71 | 2 | 0.1% |
|
| Other values (127) | 130 | 3.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.17 | 1 | 0.0% |
|
| 1.22 | 1 | 0.0% |
|
| 1.36 | 1 | 0.0% |
|
| 1.37 | 1 | 0.0% |
|
| 1.61 | 2 | 0.1% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 7.49 | 1 | 0.0% |
|
| 8.11 | 1 | 0.0% |
|
| 8.38 | 1 | 0.0% |
|
| 9.72 | 1 | 0.0% |
|
| 10.15 | 1 | 0.0% |
|
Value_SP500_EARNINGS_GROWTH_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_QUARTER and should be ignored for analysis
| Correlation | 0.99834 |
|---|
Value_SP500_EARNINGS_GROWTH_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_YEAR and should be ignored for analysis
| Correlation | 0.9994 |
|---|
Value_SP500_EARNINGS_MONTH
Highly correlated
This variable is highly correlated with Value_SP500_DIV_MONTH and should be ignored for analysis
| Correlation | 0.94595 |
|---|
Value_SP500_EARNINGS_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_DIV_YEAR and should be ignored for analysis
| Correlation | 0.94718 |
|---|
Value_SP500_EARNINGS_YIELD_MONTH
Numeric
| Distinct count | 792 |
|---|---|
| Unique (%) | 22.3% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 6.4929 |
|---|---|
| Minimum | 0.81 |
| Maximum | 18.82 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 0.81 |
|---|---|
| 5-th percentile | 4.45 |
| Q1 | 5.62 |
| Median | 5.62 |
| Q3 | 6.79 |
| 95-th percentile | 10.95 |
| Maximum | 18.82 |
| Range | 18.01 |
| Interquartile range | 1.17 |
Descriptive statistics
| Standard deviation | 2.1019 |
|---|---|
| Coef of variation | 0.32372 |
| Kurtosis | 5.3453 |
| Mean | 6.4929 |
| MAD | 1.4629 |
| Skewness | 2.0606 |
| Sum | 23037 |
| Variance | 4.418 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 5.62 | 1786 | 50.3% |
|
| 5.72 | 10 | 0.3% |
|
| 5.26 | 10 | 0.3% |
|
| 5.3 | 9 | 0.3% |
|
| 5.29 | 9 | 0.3% |
|
| 5.61 | 8 | 0.2% |
|
| 7.24 | 8 | 0.2% |
|
| 5.53 | 8 | 0.2% |
|
| 5.55 | 8 | 0.2% |
|
| 5.54 | 8 | 0.2% |
|
| Other values (782) | 1684 | 47.5% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.81 | 2 | 0.1% |
|
| 0.83 | 1 | 0.0% |
|
| 0.91 | 1 | 0.0% |
|
| 0.98 | 1 | 0.0% |
|
| 1.08 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 17.18 | 1 | 0.0% |
|
| 17.21 | 1 | 0.0% |
|
| 17.42 | 1 | 0.0% |
|
| 18.48 | 1 | 0.0% |
|
| 18.82 | 1 | 0.0% |
|
Value_SP500_EARNINGS_YIELD_YEAR
Numeric
| Distinct count | 137 |
|---|---|
| Unique (%) | 3.9% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 5.4126 |
|---|---|
| Minimum | 1.41 |
| Maximum | 17.42 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1.41 |
|---|---|
| 5-th percentile | 5.33 |
| Q1 | 5.33 |
| Median | 5.33 |
| Q3 | 5.33 |
| 95-th percentile | 5.33 |
| Maximum | 17.42 |
| Range | 16.01 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.67946 |
|---|---|
| Coef of variation | 0.12553 |
| Kurtosis | 105.43 |
| Mean | 5.4126 |
| MAD | 0.17808 |
| Skewness | 9.0727 |
| Sum | 19204 |
| Variance | 0.46167 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 5.33 | 3401 | 95.9% |
|
| 5.53 | 3 | 0.1% |
|
| 8.46 | 3 | 0.1% |
|
| 5.55 | 2 | 0.1% |
|
| 6.57 | 2 | 0.1% |
|
| 7.42 | 2 | 0.1% |
|
| 4.44 | 2 | 0.1% |
|
| 7.35 | 2 | 0.1% |
|
| 11.08 | 2 | 0.1% |
|
| 9.87 | 2 | 0.1% |
|
| Other values (127) | 127 | 3.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.41 | 1 | 0.0% |
|
| 2.17 | 1 | 0.0% |
|
| 3.04 | 1 | 0.0% |
|
| 3.18 | 1 | 0.0% |
|
| 3.44 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 13.53 | 1 | 0.0% |
|
| 13.84 | 1 | 0.0% |
|
| 15.1 | 1 | 0.0% |
|
| 15.77 | 1 | 0.0% |
|
| 17.42 | 1 | 0.0% |
|
Value_SP500_INFLADJ_MONTH
Highly correlated
This variable is highly correlated with Value_SP500_REAL_PRICE_MONTH and should be ignored for analysis
| Correlation | 0.95138 |
|---|
Value_SP500_INFLADJ_YEAR
Numeric
| Distinct count | 150 |
|---|---|
| Unique (%) | 4.2% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 101.53 |
|---|---|
| Minimum | 81.79 |
| Maximum | 2914.2 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 81.79 |
|---|---|
| 5-th percentile | 81.79 |
| Q1 | 81.79 |
| Median | 81.79 |
| Q3 | 81.79 |
| 95-th percentile | 81.79 |
| Maximum | 2914.2 |
| Range | 2832.4 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 157.9 |
|---|---|
| Coef of variation | 1.5552 |
| Kurtosis | 149.94 |
| Mean | 101.53 |
| MAD | 37.848 |
| Skewness | 11.449 |
| Sum | 360230 |
| Variance | 24932 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 81.79 | 3399 | 95.8% |
|
| 178.12 | 1 | 0.0% |
|
| 103.6 | 1 | 0.0% |
|
| 739.63 | 1 | 0.0% |
|
| 609.48 | 1 | 0.0% |
|
| 188.58 | 1 | 0.0% |
|
| 178.98 | 1 | 0.0% |
|
| 176.02 | 1 | 0.0% |
|
| 2041.06 | 1 | 0.0% |
|
| 161.59 | 1 | 0.0% |
|
| Other values (140) | 140 | 3.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 81.79 | 3399 | 95.8% |
|
| 88.75 | 1 | 0.0% |
|
| 89.81 | 1 | 0.0% |
|
| 94.32 | 1 | 0.0% |
|
| 94.95 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2187.26 | 1 | 0.0% |
|
| 2361.3 | 1 | 0.0% |
|
| 2758.77 | 1 | 0.0% |
|
| 2836.75 | 1 | 0.0% |
|
| 2914.22 | 1 | 0.0% |
|
Value_SP500_PBV_RATIO_QUARTER
Numeric
| Distinct count | 61 |
|---|---|
| Unique (%) | 1.7% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.5849 |
|---|---|
| Minimum | 1.78 |
| Maximum | 5.06 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1.78 |
|---|---|
| 5-th percentile | 2.58 |
| Q1 | 2.58 |
| Median | 2.58 |
| Q3 | 2.58 |
| 95-th percentile | 2.58 |
| Maximum | 5.06 |
| Range | 3.28 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.10134 |
|---|---|
| Coef of variation | 0.039203 |
| Kurtosis | 309.88 |
| Mean | 2.5849 |
| MAD | 0.015266 |
| Skewness | 14.136 |
| Sum | 9171.1 |
| Variance | 0.010269 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 2.58 | 3473 | 97.9% |
|
| 2.91 | 3 | 0.1% |
|
| 2.76 | 3 | 0.1% |
|
| 2.77 | 3 | 0.1% |
|
| 2.43 | 2 | 0.1% |
|
| 3.03 | 2 | 0.1% |
|
| 2.83 | 2 | 0.1% |
|
| 2.67 | 2 | 0.1% |
|
| 2.78 | 2 | 0.1% |
|
| 2.74 | 2 | 0.1% |
|
| Other values (51) | 54 | 1.5% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.78 | 1 | 0.0% |
|
| 1.85 | 1 | 0.0% |
|
| 1.89 | 1 | 0.0% |
|
| 1.9 | 1 | 0.0% |
|
| 2.0 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 4.05 | 1 | 0.0% |
|
| 4.49 | 1 | 0.0% |
|
| 4.65 | 1 | 0.0% |
|
| 5.05 | 1 | 0.0% |
|
| 5.06 | 1 | 0.0% |
|
Value_SP500_PBV_RATIO_YEAR
Numeric
| Distinct count | 19 |
|---|---|
| Unique (%) | 0.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 2.1744 |
|---|---|
| Minimum | 2 |
| Maximum | 5.05 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 2 |
|---|---|
| 5-th percentile | 2.17 |
| Q1 | 2.17 |
| Median | 2.17 |
| Q3 | 2.17 |
| 95-th percentile | 2.17 |
| Maximum | 5.05 |
| Range | 3.05 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.078208 |
|---|---|
| Coef of variation | 0.035968 |
| Kurtosis | 682.74 |
| Mean | 2.1744 |
| MAD | 0.0089232 |
| Skewness | 23.544 |
| Sum | 7714.7 |
| Variance | 0.0061165 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 2.17 | 3529 | 99.5% |
|
| 2.76 | 2 | 0.1% |
|
| 2.14 | 1 | 0.0% |
|
| 2.91 | 1 | 0.0% |
|
| 3.5 | 1 | 0.0% |
|
| 2.73 | 1 | 0.0% |
|
| 2.77 | 1 | 0.0% |
|
| 2.81 | 1 | 0.0% |
|
| 2.58 | 1 | 0.0% |
|
| 3.39 | 1 | 0.0% |
|
| Other values (9) | 9 | 0.3% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 2.0 | 1 | 0.0% |
|
| 2.05 | 1 | 0.0% |
|
| 2.14 | 1 | 0.0% |
|
| 2.17 | 3529 | 99.5% |
|
| 2.58 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 3.3 | 1 | 0.0% |
|
| 3.39 | 1 | 0.0% |
|
| 3.5 | 1 | 0.0% |
|
| 4.05 | 1 | 0.0% |
|
| 5.05 | 1 | 0.0% |
|
Value_SP500_PE_RATIO_MONTH
Numeric
| Distinct count | 1116 |
|---|---|
| Unique (%) | 31.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 15.667 |
|---|---|
| Minimum | 5.31 |
| Maximum | 123.73 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 5.31 |
|---|---|
| 5-th percentile | 9.13 |
| Q1 | 14.73 |
| Median | 15.61 |
| Q3 | 15.61 |
| 95-th percentile | 22.479 |
| Maximum | 123.73 |
| Range | 118.42 |
| Interquartile range | 0.88 |
Descriptive statistics
| Standard deviation | 5.9414 |
|---|---|
| Coef of variation | 0.37922 |
| Kurtosis | 139.05 |
| Mean | 15.667 |
| MAD | 2.3416 |
| Skewness | 9.1666 |
| Sum | 55588 |
| Variance | 35.3 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 15.61 | 1781 | 50.2% |
|
| 11.48 | 5 | 0.1% |
|
| 12.21 | 5 | 0.1% |
|
| 17.48 | 5 | 0.1% |
|
| 9.84 | 5 | 0.1% |
|
| 19.0 | 5 | 0.1% |
|
| 17.83 | 5 | 0.1% |
|
| 7.97 | 5 | 0.1% |
|
| 13.82 | 5 | 0.1% |
|
| 9.13 | 4 | 0.1% |
|
| Other values (1106) | 1723 | 48.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 5.31 | 1 | 0.0% |
|
| 5.41 | 1 | 0.0% |
|
| 5.74 | 1 | 0.0% |
|
| 5.81 | 1 | 0.0% |
|
| 5.82 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 101.87 | 1 | 0.0% |
|
| 110.37 | 1 | 0.0% |
|
| 119.85 | 1 | 0.0% |
|
| 123.32 | 1 | 0.0% |
|
| 123.73 | 1 | 0.0% |
|
Value_SP500_PE_RATIO_YEAR
Numeric
| Distinct count | 142 |
|---|---|
| Unique (%) | 4.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 11.989 |
|---|---|
| Minimum | 5.74 |
| Maximum | 70.91 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 5.74 |
|---|---|
| 5-th percentile | 11.82 |
| Q1 | 11.82 |
| Median | 11.82 |
| Q3 | 11.82 |
| 95-th percentile | 11.82 |
| Maximum | 70.91 |
| Range | 65.17 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 1.6921 |
|---|---|
| Coef of variation | 0.14114 |
| Kurtosis | 487.59 |
| Mean | 11.989 |
| MAD | 0.37918 |
| Skewness | 17.568 |
| Sum | 42537 |
| Variance | 2.8634 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 11.82 | 3401 | 95.9% |
|
| 13.48 | 2 | 0.1% |
|
| 18.01 | 2 | 0.1% |
|
| 7.97 | 2 | 0.1% |
|
| 9.02 | 2 | 0.1% |
|
| 10.13 | 2 | 0.1% |
|
| 18.77 | 2 | 0.1% |
|
| 17.81 | 1 | 0.0% |
|
| 19.33 | 1 | 0.0% |
|
| 19.99 | 1 | 0.0% |
|
| Other values (132) | 132 | 3.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 5.74 | 1 | 0.0% |
|
| 6.34 | 1 | 0.0% |
|
| 6.62 | 1 | 0.0% |
|
| 7.22 | 1 | 0.0% |
|
| 7.39 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 29.04 | 1 | 0.0% |
|
| 31.43 | 1 | 0.0% |
|
| 32.92 | 1 | 0.0% |
|
| 46.17 | 1 | 0.0% |
|
| 70.91 | 1 | 0.0% |
|
Value_SP500_PSR_QUARTER
Numeric
| Distinct count | 55 |
|---|---|
| Unique (%) | 1.6% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.432 |
|---|---|
| Minimum | 0.8 |
| Maximum | 2.31 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 0.8 |
|---|---|
| 5-th percentile | 1.43 |
| Q1 | 1.43 |
| Median | 1.43 |
| Q3 | 1.43 |
| 95-th percentile | 1.43 |
| Maximum | 2.31 |
| Range | 1.51 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.048445 |
|---|---|
| Coef of variation | 0.03383 |
| Kurtosis | 154.51 |
| Mean | 1.432 |
| MAD | 0.0071731 |
| Skewness | 8.0394 |
| Sum | 5080.8 |
| Variance | 0.002347 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 1.43 | 3479 | 98.1% |
|
| 1.44 | 3 | 0.1% |
|
| 1.54 | 3 | 0.1% |
|
| 1.31 | 3 | 0.1% |
|
| 1.19 | 2 | 0.1% |
|
| 1.77 | 2 | 0.1% |
|
| 1.52 | 2 | 0.1% |
|
| 1.27 | 2 | 0.1% |
|
| 2.1 | 2 | 0.1% |
|
| 1.66 | 2 | 0.1% |
|
| Other values (45) | 48 | 1.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.8 | 1 | 0.0% |
|
| 0.87 | 1 | 0.0% |
|
| 0.97 | 1 | 0.0% |
|
| 1.08 | 1 | 0.0% |
|
| 1.1 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2.1 | 2 | 0.1% |
|
| 2.13 | 1 | 0.0% |
|
| 2.17 | 1 | 0.0% |
|
| 2.25 | 1 | 0.0% |
|
| 2.31 | 1 | 0.0% |
|
Value_SP500_PSR_YEAR
Numeric
| Distinct count | 16 |
|---|---|
| Unique (%) | 0.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.3116 |
|---|---|
| Minimum | 0.87 |
| Maximum | 2.31 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 0.87 |
|---|---|
| 5-th percentile | 1.31 |
| Q1 | 1.31 |
| Median | 1.31 |
| Q3 | 1.31 |
| 95-th percentile | 1.31 |
| Maximum | 2.31 |
| Range | 1.44 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.033841 |
|---|---|
| Coef of variation | 0.025802 |
| Kurtosis | 506.53 |
| Mean | 1.3116 |
| MAD | 0.003505 |
| Skewness | 20.076 |
| Sum | 4653.5 |
| Variance | 0.0011452 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 1.31 | 3530 | 99.5% |
|
| 1.56 | 2 | 0.1% |
|
| 1.43 | 2 | 0.1% |
|
| 1.77 | 2 | 0.1% |
|
| 1.66 | 1 | 0.0% |
|
| 0.87 | 1 | 0.0% |
|
| 1.81 | 1 | 0.0% |
|
| 2.13 | 1 | 0.0% |
|
| 1.3 | 1 | 0.0% |
|
| 1.95 | 1 | 0.0% |
|
| Other values (6) | 6 | 0.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.87 | 1 | 0.0% |
|
| 1.19 | 1 | 0.0% |
|
| 1.23 | 1 | 0.0% |
|
| 1.3 | 1 | 0.0% |
|
| 1.31 | 3530 | 99.5% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1.81 | 1 | 0.0% |
|
| 1.95 | 1 | 0.0% |
|
| 2.13 | 1 | 0.0% |
|
| 2.17 | 1 | 0.0% |
|
| 2.31 | 1 | 0.0% |
|
Value_SP500_REAL_EARNINGS_GROWTH_QUARTER
Numeric
| Distinct count | 114 |
|---|---|
| Unique (%) | 3.2% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 11.791 |
|---|---|
| Minimum | -90.27 |
| Maximum | 905.56 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -90.27 |
|---|---|
| 5-th percentile | 11.38 |
| Q1 | 11.38 |
| Median | 11.38 |
| Q3 | 11.38 |
| 95-th percentile | 11.38 |
| Maximum | 905.56 |
| Range | 995.83 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 23.282 |
|---|---|
| Coef of variation | 1.9746 |
| Kurtosis | 1188.3 |
| Mean | 11.791 |
| MAD | 1.7031 |
| Skewness | 32.842 |
| Sum | 41833 |
| Variance | 542.04 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 11.38 | 3435 | 96.8% |
|
| 17.8 | 1 | 0.0% |
|
| -7.72 | 1 | 0.0% |
|
| -31.14 | 1 | 0.0% |
|
| 0.55 | 1 | 0.0% |
|
| -44.15 | 1 | 0.0% |
|
| 11.18 | 1 | 0.0% |
|
| -21.05 | 1 | 0.0% |
|
| -86.8 | 1 | 0.0% |
|
| 15.28 | 1 | 0.0% |
|
| Other values (104) | 104 | 2.9% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -90.27 | 1 | 0.0% |
|
| -86.8 | 1 | 0.0% |
|
| -79.48 | 1 | 0.0% |
|
| -73.86 | 1 | 0.0% |
|
| -51.84 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 72.45 | 1 | 0.0% |
|
| 261.66 | 1 | 0.0% |
|
| 492.53 | 1 | 0.0% |
|
| 870.4 | 1 | 0.0% |
|
| 905.56 | 1 | 0.0% |
|
Value_SP500_REAL_EARNINGS_GROWTH_YEAR
Numeric
| Distinct count | 33 |
|---|---|
| Unique (%) | 0.9% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | -78.62 |
|---|---|
| Minimum | -79.48 |
| Maximum | 261.66 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -79.48 |
|---|---|
| 5-th percentile | -79.48 |
| Q1 | -79.48 |
| Median | -79.48 |
| Q3 | -79.48 |
| 95-th percentile | -79.48 |
| Maximum | 261.66 |
| Range | 341.14 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 10.149 |
|---|---|
| Coef of variation | -0.12909 |
| Kurtosis | 419.42 |
| Mean | -78.62 |
| MAD | 1.7038 |
| Skewness | 17.108 |
| Sum | -278940 |
| Variance | 103.01 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| -79.48 | 3516 | 99.1% |
|
| 17.8 | 1 | 0.0% |
|
| 8.1 | 1 | 0.0% |
|
| 261.66 | 1 | 0.0% |
|
| -28.02 | 1 | 0.0% |
|
| 49.72 | 1 | 0.0% |
|
| 0.36 | 1 | 0.0% |
|
| -6.49 | 1 | 0.0% |
|
| 11.38 | 1 | 0.0% |
|
| 10.69 | 1 | 0.0% |
|
| Other values (23) | 23 | 0.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -79.48 | 3516 | 99.1% |
|
| -51.84 | 1 | 0.0% |
|
| -28.02 | 1 | 0.0% |
|
| -21.05 | 1 | 0.0% |
|
| -15.56 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 24.85 | 1 | 0.0% |
|
| 36.07 | 1 | 0.0% |
|
| 49.72 | 1 | 0.0% |
|
| 72.45 | 1 | 0.0% |
|
| 261.66 | 1 | 0.0% |
|
Value_SP500_REAL_PRICE_MONTH
Numeric
| Distinct count | 1400 |
|---|---|
| Unique (%) | 39.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 133.25 |
|---|---|
| Minimum | 2.73 |
| Maximum | 2789.8 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 2.73 |
|---|---|
| 5-th percentile | 4.37 |
| Q1 | 4.37 |
| Median | 4.37 |
| Q3 | 16.293 |
| 95-th percentile | 1149.6 |
| Maximum | 2789.8 |
| Range | 2787.1 |
| Interquartile range | 11.922 |
Descriptive statistics
| Standard deviation | 391.81 |
|---|---|
| Coef of variation | 2.9404 |
| Kurtosis | 14.745 |
| Mean | 133.25 |
| MAD | 208.57 |
| Skewness | 3.7646 |
| Sum | 472780 |
| Variance | 153520 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 4.37 | 1784 | 50.3% |
|
| 4.46 | 7 | 0.2% |
|
| 5.32 | 6 | 0.2% |
|
| 5.3 | 6 | 0.2% |
|
| 5.18 | 6 | 0.2% |
|
| 7.68 | 6 | 0.2% |
|
| 4.59 | 5 | 0.1% |
|
| 5.51 | 5 | 0.1% |
|
| 4.65 | 5 | 0.1% |
|
| 8.12 | 5 | 0.1% |
|
| Other values (1390) | 1713 | 48.3% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 2.73 | 1 | 0.0% |
|
| 2.85 | 1 | 0.0% |
|
| 2.94 | 2 | 0.1% |
|
| 3.05 | 1 | 0.0% |
|
| 3.17 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2702.77 | 1 | 0.0% |
|
| 2705.16 | 1 | 0.0% |
|
| 2736.61 | 1 | 0.0% |
|
| 2754.35 | 1 | 0.0% |
|
| 2789.8 | 1 | 0.0% |
|
Value_SP500_REAL_SALES_GROWTH_QUARTER
Numeric
| Distinct count | 66 |
|---|---|
| Unique (%) | 1.9% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | -1.7438 |
|---|---|
| Minimum | -15.81 |
| Maximum | 8.89 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -15.81 |
|---|---|
| 5-th percentile | -1.8 |
| Q1 | -1.8 |
| Median | -1.8 |
| Q3 | -1.8 |
| 95-th percentile | -1.8 |
| Maximum | 8.89 |
| Range | 24.7 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.86472 |
|---|---|
| Coef of variation | -0.49588 |
| Kurtosis | 96.112 |
| Mean | -1.7438 |
| MAD | 0.15904 |
| Skewness | 3.3383 |
| Sum | -6187 |
| Variance | 0.74774 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| -1.8 | 3483 | 98.2% |
|
| 7.95 | 1 | 0.0% |
|
| 7.72 | 1 | 0.0% |
|
| -6.85 | 1 | 0.0% |
|
| 5.72 | 1 | 0.0% |
|
| 7.23 | 1 | 0.0% |
|
| 1.13 | 1 | 0.0% |
|
| 2.88 | 1 | 0.0% |
|
| 4.39 | 1 | 0.0% |
|
| 5.09 | 1 | 0.0% |
|
| Other values (56) | 56 | 1.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -15.81 | 1 | 0.0% |
|
| -12.69 | 1 | 0.0% |
|
| -12.66 | 1 | 0.0% |
|
| -9.91 | 1 | 0.0% |
|
| -8.35 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 7.53 | 1 | 0.0% |
|
| 7.72 | 1 | 0.0% |
|
| 7.95 | 1 | 0.0% |
|
| 8.87 | 1 | 0.0% |
|
| 8.89 | 1 | 0.0% |
|
Value_SP500_REAL_SALES_GROWTH_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_SALES_GROWTH_YEAR and should be ignored for analysis
| Correlation | 0.99905 |
|---|
Value_SP500_REAL_SALES_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_SALES_QUARTER and should be ignored for analysis
| Correlation | 0.98094 |
|---|
Value_SP500_REAL_SALES_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_REAL_SALES_GROWTH_YEAR and should be ignored for analysis
| Correlation | 0.91447 |
|---|
Value_SP500_SALES_GROWTH_QUARTER
Numeric
| Distinct count | 67 |
|---|---|
| Unique (%) | 1.9% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | -16.087 |
|---|---|
| Minimum | -16.46 |
| Maximum | 12.56 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -16.46 |
|---|---|
| 5-th percentile | -16.46 |
| Q1 | -16.46 |
| Median | -16.46 |
| Q3 | -16.46 |
| 95-th percentile | -16.46 |
| Maximum | 12.56 |
| Range | 29.02 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 2.8312 |
|---|---|
| Coef of variation | -0.176 |
| Kurtosis | 61.813 |
| Mean | -16.087 |
| MAD | 0.73302 |
| Skewness | 7.8448 |
| Sum | -57075 |
| Variance | 8.0156 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| -16.46 | 3482 | 98.1% |
|
| -2.47 | 1 | 0.0% |
|
| 1.7 | 1 | 0.0% |
|
| 0.2 | 1 | 0.0% |
|
| -8.45 | 1 | 0.0% |
|
| 8.94 | 1 | 0.0% |
|
| 7.62 | 1 | 0.0% |
|
| 9.96 | 1 | 0.0% |
|
| 8.04 | 1 | 0.0% |
|
| 4.81 | 1 | 0.0% |
|
| Other values (57) | 57 | 1.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -16.46 | 3482 | 98.1% |
|
| -12.86 | 1 | 0.0% |
|
| -11.95 | 1 | 0.0% |
|
| -8.45 | 1 | 0.0% |
|
| -7.97 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 10.93 | 1 | 0.0% |
|
| 11.01 | 1 | 0.0% |
|
| 11.29 | 1 | 0.0% |
|
| 11.99 | 1 | 0.0% |
|
| 12.56 | 1 | 0.0% |
|
Value_SP500_SALES_GROWTH_YEAR
Numeric
| Distinct count | 21 |
|---|---|
| Unique (%) | 0.6% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | -12.761 |
|---|---|
| Minimum | -12.86 |
| Maximum | 10.93 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -12.86 |
|---|---|
| 5-th percentile | -12.86 |
| Q1 | -12.86 |
| Median | -12.86 |
| Q3 | -12.86 |
| 95-th percentile | -12.86 |
| Maximum | 10.93 |
| Range | 23.79 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 1.3605 |
|---|---|
| Coef of variation | -0.10661 |
| Kurtosis | 208.16 |
| Mean | -12.761 |
| MAD | 0.19651 |
| Skewness | 14.282 |
| Sum | -45277 |
| Variance | 1.8508 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| -12.86 | 3528 | 99.4% |
|
| 2.24 | 1 | 0.0% |
|
| 5.98 | 1 | 0.0% |
|
| 5.37 | 1 | 0.0% |
|
| 10.93 | 1 | 0.0% |
|
| 4.16 | 1 | 0.0% |
|
| -1.18 | 1 | 0.0% |
|
| 7.03 | 1 | 0.0% |
|
| 1.7 | 1 | 0.0% |
|
| 3.76 | 1 | 0.0% |
|
| Other values (11) | 11 | 0.3% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -12.86 | 3528 | 99.4% |
|
| -8.45 | 1 | 0.0% |
|
| -3.11 | 1 | 0.0% |
|
| -1.18 | 1 | 0.0% |
|
| 1.7 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 8.94 | 1 | 0.0% |
|
| 9.03 | 1 | 0.0% |
|
| 9.36 | 1 | 0.0% |
|
| 10.88 | 1 | 0.0% |
|
| 10.93 | 1 | 0.0% |
|
Value_SP500_SALES_QUARTER
Numeric
| Distinct count | 71 |
|---|---|
| Unique (%) | 2.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 680.53 |
|---|---|
| Minimum | 674.59 |
| Maximum | 1292.8 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 674.59 |
|---|---|
| 5-th percentile | 674.59 |
| Q1 | 674.59 |
| Median | 674.59 |
| Q3 | 674.59 |
| 95-th percentile | 674.59 |
| Maximum | 1292.8 |
| Range | 618.25 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 48.068 |
|---|---|
| Coef of variation | 0.070634 |
| Kurtosis | 79.358 |
| Mean | 680.53 |
| MAD | 11.646 |
| Skewness | 8.7691 |
| Sum | 2414500 |
| Variance | 2310.6 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 674.59 | 3478 | 98.0% |
|
| 1127.13 | 1 | 0.0% |
|
| 917.93 | 1 | 0.0% |
|
| 998.54 | 1 | 0.0% |
|
| 1185.81 | 1 | 0.0% |
|
| 828.1 | 1 | 0.0% |
|
| 1136.16 | 1 | 0.0% |
|
| 738.81 | 1 | 0.0% |
|
| 965.19 | 1 | 0.0% |
|
| 981.21 | 1 | 0.0% |
|
| Other values (61) | 61 | 1.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 674.59 | 3478 | 98.0% |
|
| 678.6 | 1 | 0.0% |
|
| 684.42 | 1 | 0.0% |
|
| 697.75 | 1 | 0.0% |
|
| 697.9 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1185.81 | 1 | 0.0% |
|
| 1203.1 | 1 | 0.0% |
|
| 1231.57 | 1 | 0.0% |
|
| 1259.18 | 1 | 0.0% |
|
| 1292.84 | 1 | 0.0% |
|
Value_SP500_SALES_YEAR
Numeric
| Distinct count | 22 |
|---|---|
| Unique (%) | 0.6% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 676.69 |
|---|---|
| Minimum | 674.59 |
| Maximum | 1292.8 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 674.59 |
|---|---|
| 5-th percentile | 674.59 |
| Q1 | 674.59 |
| Median | 674.59 |
| Q3 | 674.59 |
| 95-th percentile | 674.59 |
| Maximum | 1292.8 |
| Range | 618.25 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 30.328 |
|---|---|
| Coef of variation | 0.044818 |
| Kurtosis | 255.59 |
| Mean | 676.69 |
| MAD | 4.169 |
| Skewness | 15.665 |
| Sum | 2400900 |
| Variance | 919.76 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 674.59 | 3527 | 99.4% |
|
| 1127.13 | 1 | 0.0% |
|
| 1092.37 | 1 | 0.0% |
|
| 1203.1 | 1 | 0.0% |
|
| 1292.84 | 1 | 0.0% |
|
| 1169.42 | 1 | 0.0% |
|
| 1163.32 | 1 | 0.0% |
|
| 1042.46 | 1 | 0.0% |
|
| 874.32 | 1 | 0.0% |
|
| 710.81 | 1 | 0.0% |
|
| Other values (12) | 12 | 0.3% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 674.59 | 3527 | 99.4% |
|
| 710.81 | 1 | 0.0% |
|
| 736.88 | 1 | 0.0% |
|
| 745.7 | 1 | 0.0% |
|
| 788.17 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1169.42 | 1 | 0.0% |
|
| 1203.1 | 1 | 0.0% |
|
| 1231.57 | 1 | 0.0% |
|
| 1259.18 | 1 | 0.0% |
|
| 1292.84 | 1 | 0.0% |
|
| Value_SP500_REAL_PRICE_MONTH | Value_SP500_DIV_YIELD_MONTH | Value_SP500_PE_RATIO_MONTH | Value_SHILLER_PE_RATIO_MONTH | Value_SP500_EARNINGS_YIELD_MONTH | Value_SP500_INFLADJ_MONTH | Value_SP500_PSR_QUARTER | Value_SP500_DIV_MONTH | Value_SP500_DIV_YEAR | Value_SP500_DIV_GROWTH_YEAR | Value_SP500_DIV_GROWTH_QUARTER | Value_SP500_PBV_RATIO_QUARTER | Value_SHILLER_PE_RATIO_YEAR | Value_SP500_PE_RATIO_YEAR | Value_SP500_DIV_YIELD_YEAR | Value_SP500_PSR_YEAR | Value_SP500_EARNINGS_YIELD_YEAR | Value_SP500_PBV_RATIO_YEAR | Value_SP500_INFLADJ_YEAR | Value_SP500_SALES_YEAR | Value_SP500_SALES_GROWTH_YEAR | Value_SP500_SALES_QUARTER | Value_SP500_REAL_SALES_GROWTH_QUARTER | Value_SP500_SALES_GROWTH_QUARTER | Value_SP500_REAL_SALES_GROWTH_YEAR | Value_SP500_REAL_EARNINGS_GROWTH_YEAR | Value_SP500_REAL_SALES_YEAR | Value_SP500_REAL_EARNINGS_GROWTH_QUARTER | Value_SP500_EARNINGS_GROWTH_QUARTER | Value_SP500_REAL_SALES_QUARTER | Value_SP500_EARNINGS_MONTH | Value_SP500_BVPS_YEAR | Value_SP500_EARNINGS_YEAR | Value_SP500_EARNINGS_GROWTH_YEAR | Value_SP500_BVPS_QUARTER | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||||||||||||||||
| 1871-01-01 | 4.44 | 4.22 | 11.10 | 11.34 | 9.01 | 89.81 | 1.43 | 7.19 | 6.70 | -21.07 | 11.89 | 2.58 | 11.9 | 11.10 | 4.07 | 1.31 | 9.01 | 2.17 | 89.81 | 674.59 | -12.86 | 674.59 | -1.8 | -16.46 | -12.69 | -79.48 | 943.84 | 11.38 | 17.75 | 942.41 | 8.83 | 290.68 | 7.95 | -77.52 | 290.68 |
| 1871-01-31 | 4.37 | 5.86 | 15.61 | 11.34 | 5.62 | 99.41 | 1.43 | 5.26 | 5.15 | -21.07 | 11.89 | 2.58 | 11.9 | 11.82 | 5.86 | 1.31 | 5.33 | 2.17 | 81.79 | 674.59 | -12.86 | 674.59 | -1.8 | -16.46 | -12.69 | -79.48 | 943.84 | 11.38 | 17.75 | 942.41 | 8.09 | 290.68 | 7.92 | -77.52 | 290.68 |
| 1871-02-01 | 4.50 | 4.22 | 11.25 | 10.92 | 8.89 | 88.33 | 1.43 | 7.19 | 6.70 | -21.07 | 11.89 | 2.58 | 11.9 | 11.82 | 4.07 | 1.31 | 5.33 | 2.17 | 81.79 | 674.59 | -12.86 | 674.59 | -1.8 | -16.46 | -12.69 | -79.48 | 943.84 | 11.38 | 17.75 | 942.41 | 8.83 | 290.68 | 7.95 | -77.52 | 290.68 |
| 1871-02-28 | 4.37 | 5.78 | 15.61 | 11.34 | 5.62 | 99.41 | 1.43 | 5.10 | 6.70 | -21.07 | 11.89 | 2.58 | 11.9 | 11.82 | 4.07 | 1.31 | 5.33 | 2.17 | 81.79 | 674.59 | -12.86 | 674.59 | -1.8 | -16.46 | -12.69 | -79.48 | 943.84 | 11.38 | 17.75 | 942.41 | 7.85 | 290.68 | 7.95 | -77.52 | 290.68 |
| 1871-03-01 | 4.61 | 4.22 | 11.52 | 11.19 | 8.68 | 89.17 | 1.43 | 7.19 | 6.70 | -21.07 | 11.89 | 2.58 | 11.9 | 11.82 | 4.07 | 1.31 | 5.33 | 2.17 | 81.79 | 674.59 | -12.86 | 674.59 | -1.8 | -16.46 | -12.69 | -79.48 | 943.84 | 11.38 | 17.75 | 942.41 | 8.83 | 290.68 | 7.95 | -77.52 | 290.68 |
#Apply linear interpolation on the dataset.
df_interpolate = df.interpolate(method='linear',axis=0,inplace=False,limit_direction='both')
print(df_imputed.shape)
print(df_interpolate.shape)
(3548, 35) (3548, 35)
#Full profile on interpolated dataset.
pandas_profiling.ProfileReport(df_interpolate)
Dataset info
| Number of variables | 36 |
|---|---|
| Number of observations | 3548 |
| Total Missing (%) | 0.0% |
| Total size in memory | 998.0 KiB |
| Average record size in memory | 288.0 B |
Variables types
| Numeric | 13 |
|---|---|
| Categorical | 0 |
| Boolean | 0 |
| Date | 1 |
| Text (Unique) | 0 |
| Rejected | 22 |
| Unsupported | 0 |
Warnings
Value_SHILLER_PE_RATIO_YEAR is highly correlated with Value_SHILLER_PE_RATIO_MONTH (ρ = 0.98989) RejectedValue_SP500_BVPS_QUARTER is highly correlated with Value_SP500_BVPS_YEAR (ρ = 0.99959) RejectedValue_SP500_BVPS_YEAR is highly correlated with Value_SP500_SALES_QUARTER (ρ = 0.97772) RejectedValue_SP500_DIV_GROWTH_QUARTER is highly correlated with Value_SP500_DIV_GROWTH_YEAR (ρ = 0.99279) RejectedValue_SP500_DIV_MONTH is highly correlated with Value_SP500_INFLADJ_MONTH (ρ = 0.91991) RejectedValue_SP500_DIV_YEAR is highly correlated with Value_SP500_DIV_MONTH (ρ = 0.99956) RejectedValue_SP500_DIV_YIELD_YEAR is highly correlated with Value_SP500_DIV_YIELD_MONTH (ρ = 0.97677) RejectedValue_SP500_EARNINGS_GROWTH_QUARTER is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_QUARTER (ρ = 0.99925) RejectedValue_SP500_EARNINGS_GROWTH_YEAR is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_YEAR (ρ = 0.99856) RejectedValue_SP500_EARNINGS_MONTH is highly correlated with Value_SP500_INFLADJ_YEAR (ρ = 0.9169) RejectedValue_SP500_EARNINGS_YEAR is highly correlated with Value_SP500_EARNINGS_MONTH (ρ = 0.99519) RejectedValue_SP500_EARNINGS_YIELD_YEAR is highly correlated with Value_SP500_EARNINGS_YIELD_MONTH (ρ = 0.98185) RejectedValue_SP500_INFLADJ_MONTH is highly correlated with Value_SP500_REAL_PRICE_MONTH (ρ = 0.9681) RejectedValue_SP500_INFLADJ_YEAR is highly correlated with Value_SP500_DIV_YEAR (ρ = 0.92035) RejectedValue_SP500_PBV_RATIO_YEAR is highly correlated with Value_SP500_PBV_RATIO_QUARTER (ρ = 0.99857) RejectedValue_SP500_PSR_YEAR is highly correlated with Value_SP500_PSR_QUARTER (ρ = 0.98853) RejectedValue_SP500_REAL_SALES_GROWTH_QUARTER is highly correlated with Value_SP500_SALES_GROWTH_YEAR (ρ = 0.9797) RejectedValue_SP500_REAL_SALES_GROWTH_YEAR is highly correlated with Value_SP500_SALES_GROWTH_QUARTER (ρ = 0.96667) RejectedValue_SP500_REAL_SALES_QUARTER is highly correlated with Value_SP500_REAL_SALES_YEAR (ρ = 0.99537) RejectedValue_SP500_REAL_SALES_YEAR is highly correlated with Value_SP500_SALES_QUARTER (ρ = 0.90087) RejectedValue_SP500_SALES_GROWTH_QUARTER is highly correlated with Value_SP500_REAL_SALES_GROWTH_QUARTER (ρ = 0.99017) RejectedValue_SP500_SALES_QUARTER is highly correlated with Value_SP500_SALES_YEAR (ρ = 0.99943) RejectedDate
Date
| Distinct count | 3548 |
|---|---|
| Unique (%) | 100.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Minimum | 1871-01-01 00:00:00 |
|---|---|
| Maximum | 2018-12-31 00:00:00 |
Value_SHILLER_PE_RATIO_MONTH
Numeric
| Distinct count | 2354 |
|---|---|
| Unique (%) | 66.3% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 16.578 |
|---|---|
| Minimum | 4.78 |
| Maximum | 44.19 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 4.78 |
|---|---|
| 5-th percentile | 7.9768 |
| Q1 | 11.62 |
| Median | 15.655 |
| Q3 | 20.152 |
| 95-th percentile | 28.102 |
| Maximum | 44.19 |
| Range | 39.41 |
| Interquartile range | 8.5325 |
Descriptive statistics
| Standard deviation | 6.676 |
|---|---|
| Coef of variation | 0.4027 |
| Kurtosis | 1.9619 |
| Mean | 16.578 |
| MAD | 5.1107 |
| Skewness | 1.1209 |
| Sum | 58820 |
| Variance | 44.57 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 13.8 | 7 | 0.2% |
|
| 11.34 | 7 | 0.2% |
|
| 17.65 | 7 | 0.2% |
|
| 15.23 | 7 | 0.2% |
|
| 17.82 | 7 | 0.2% |
|
| 12.43 | 6 | 0.2% |
|
| 10.91 | 6 | 0.2% |
|
| 11.64 | 6 | 0.2% |
|
| 15.27 | 6 | 0.2% |
|
| 18.07 | 6 | 0.2% |
|
| Other values (2344) | 3483 | 98.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 4.78 | 1 | 0.0% |
|
| 4.95 | 1 | 0.0% |
|
| 4.955 | 1 | 0.0% |
|
| 5.02 | 1 | 0.0% |
|
| 5.04 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 43.7 | 1 | 0.0% |
|
| 43.77 | 1 | 0.0% |
|
| 43.83 | 1 | 0.0% |
|
| 43.980000000000004 | 1 | 0.0% |
|
| 44.19 | 1 | 0.0% |
|
Value_SHILLER_PE_RATIO_YEAR
Highly correlated
This variable is highly correlated with Value_SHILLER_PE_RATIO_MONTH and should be ignored for analysis
| Correlation | 0.98989 |
|---|
Value_SP500_BVPS_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_BVPS_YEAR and should be ignored for analysis
| Correlation | 0.99959 |
|---|
Value_SP500_BVPS_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_SALES_QUARTER and should be ignored for analysis
| Correlation | 0.97772 |
|---|
Value_SP500_DIV_GROWTH_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_DIV_GROWTH_YEAR and should be ignored for analysis
| Correlation | 0.99279 |
|---|
Value_SP500_DIV_GROWTH_YEAR
Numeric
| Distinct count | 689 |
|---|---|
| Unique (%) | 19.4% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 11.924 |
|---|---|
| Minimum | -21.07 |
| Maximum | 18.25 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -21.07 |
|---|---|
| 5-th percentile | 2.3661 |
| Q1 | 13.38 |
| Median | 13.38 |
| Q3 | 13.38 |
| 95-th percentile | 13.38 |
| Maximum | 18.25 |
| Range | 39.32 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 4.2003 |
|---|---|
| Coef of variation | 0.35224 |
| Kurtosis | 15.155 |
| Mean | 11.924 |
| MAD | 2.5096 |
| Skewness | -3.5077 |
| Sum | 42308 |
| Variance | 17.643 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 13.38 | 2856 | 80.5% |
|
| 8.65 | 4 | 0.1% |
|
| 10.68 | 2 | 0.1% |
|
| 13.4225 | 1 | 0.0% |
|
| 4.31625 | 1 | 0.0% |
|
| 16.26 | 1 | 0.0% |
|
| 11.246666666666666 | 1 | 0.0% |
|
| 13.151666666666667 | 1 | 0.0% |
|
| 17.255000000000003 | 1 | 0.0% |
|
| 12.135 | 1 | 0.0% |
|
| Other values (679) | 679 | 19.1% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -21.07 | 1 | 0.0% |
|
| -20.131666666666668 | 1 | 0.0% |
|
| -20.09375 | 1 | 0.0% |
|
| -19.193333333333335 | 1 | 0.0% |
|
| -19.1175 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 17.989166666666666 | 1 | 0.0% |
|
| 18.00125 | 1 | 0.0% |
|
| 18.08416666666667 | 1 | 0.0% |
|
| 18.167083333333334 | 1 | 0.0% |
|
| 18.25 | 1 | 0.0% |
|
Value_SP500_DIV_MONTH
Highly correlated
This variable is highly correlated with Value_SP500_INFLADJ_MONTH and should be ignored for analysis
| Correlation | 0.91991 |
|---|
Value_SP500_DIV_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_DIV_MONTH and should be ignored for analysis
| Correlation | 0.99956 |
|---|
Value_SP500_DIV_YIELD_MONTH
Numeric
| Distinct count | 1295 |
|---|---|
| Unique (%) | 36.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 4.3469 |
|---|---|
| Minimum | 1.11 |
| Maximum | 13.84 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1.11 |
|---|---|
| 5-th percentile | 1.74 |
| Q1 | 3.1588 |
| Median | 4.285 |
| Q3 | 5.39 |
| 95-th percentile | 7.18 |
| Maximum | 13.84 |
| Range | 12.73 |
| Interquartile range | 2.2312 |
Descriptive statistics
| Standard deviation | 1.6983 |
|---|---|
| Coef of variation | 0.39069 |
| Kurtosis | 0.77793 |
| Mean | 4.3469 |
| MAD | 1.3423 |
| Skewness | 0.47674 |
| Sum | 15423 |
| Variance | 2.8842 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 5.18 | 15 | 0.4% |
|
| 5.01 | 13 | 0.4% |
|
| 1.94 | 12 | 0.3% |
|
| 1.76 | 12 | 0.3% |
|
| 5.22 | 12 | 0.3% |
|
| 3.7 | 11 | 0.3% |
|
| 4.22 | 11 | 0.3% |
|
| 4.92 | 10 | 0.3% |
|
| 2.96 | 10 | 0.3% |
|
| 3.49 | 10 | 0.3% |
|
| Other values (1285) | 3432 | 96.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.11 | 3 | 0.1% |
|
| 1.12 | 1 | 0.0% |
|
| 1.13 | 1 | 0.0% |
|
| 1.1349999999999998 | 1 | 0.0% |
|
| 1.14 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 12.46 | 1 | 0.0% |
|
| 12.64 | 1 | 0.0% |
|
| 13.15 | 1 | 0.0% |
|
| 13.24 | 1 | 0.0% |
|
| 13.84 | 1 | 0.0% |
|
Value_SP500_DIV_YIELD_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_DIV_YIELD_MONTH and should be ignored for analysis
| Correlation | 0.97677 |
|---|
Value_SP500_EARNINGS_GROWTH_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_QUARTER and should be ignored for analysis
| Correlation | 0.99925 |
|---|
Value_SP500_EARNINGS_GROWTH_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_REAL_EARNINGS_GROWTH_YEAR and should be ignored for analysis
| Correlation | 0.99856 |
|---|
Value_SP500_EARNINGS_MONTH
Highly correlated
This variable is highly correlated with Value_SP500_INFLADJ_YEAR and should be ignored for analysis
| Correlation | 0.9169 |
|---|
Value_SP500_EARNINGS_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_EARNINGS_MONTH and should be ignored for analysis
| Correlation | 0.99519 |
|---|
Value_SP500_EARNINGS_YIELD_MONTH
Numeric
| Distinct count | 1649 |
|---|---|
| Unique (%) | 46.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 7.3645 |
|---|---|
| Minimum | 0.81 |
| Maximum | 18.82 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 0.81 |
|---|---|
| 5-th percentile | 3.8938 |
| Q1 | 5.55 |
| Median | 6.78 |
| Q3 | 8.755 |
| 95-th percentile | 12.783 |
| Maximum | 18.82 |
| Range | 18.01 |
| Interquartile range | 3.205 |
Descriptive statistics
| Standard deviation | 2.7021 |
|---|---|
| Coef of variation | 0.36691 |
| Kurtosis | 1.1874 |
| Mean | 7.3645 |
| MAD | 2.0858 |
| Skewness | 0.96555 |
| Sum | 26129 |
| Variance | 7.3012 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 5.62 | 15 | 0.4% |
|
| 5.29 | 13 | 0.4% |
|
| 5.54 | 13 | 0.4% |
|
| 5.33 | 12 | 0.3% |
|
| 5.72 | 11 | 0.3% |
|
| 6.23 | 11 | 0.3% |
|
| 5.3 | 11 | 0.3% |
|
| 5.38 | 11 | 0.3% |
|
| 5.48 | 11 | 0.3% |
|
| 5.26 | 11 | 0.3% |
|
| Other values (1639) | 3429 | 96.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.81 | 3 | 0.1% |
|
| 0.8200000000000001 | 1 | 0.0% |
|
| 0.83 | 1 | 0.0% |
|
| 0.87 | 1 | 0.0% |
|
| 0.895 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 17.845 | 1 | 0.0% |
|
| 18.12 | 1 | 0.0% |
|
| 18.48 | 1 | 0.0% |
|
| 18.65 | 1 | 0.0% |
|
| 18.82 | 1 | 0.0% |
|
Value_SP500_EARNINGS_YIELD_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_EARNINGS_YIELD_MONTH and should be ignored for analysis
| Correlation | 0.98185 |
|---|
Value_SP500_INFLADJ_MONTH
Highly correlated
This variable is highly correlated with Value_SP500_REAL_PRICE_MONTH and should be ignored for analysis
| Correlation | 0.9681 |
|---|
Value_SP500_INFLADJ_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_DIV_YEAR and should be ignored for analysis
| Correlation | 0.92035 |
|---|
Value_SP500_PBV_RATIO_QUARTER
Numeric
| Distinct count | 333 |
|---|---|
| Unique (%) | 9.4% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 4.7592 |
|---|---|
| Minimum | 1.78 |
| Maximum | 5.06 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 1.78 |
|---|---|
| 5-th percentile | 2.6767 |
| Q1 | 5.05 |
| Median | 5.05 |
| Q3 | 5.05 |
| 95-th percentile | 5.05 |
| Maximum | 5.06 |
| Range | 3.28 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.79199 |
|---|---|
| Coef of variation | 0.16641 |
| Kurtosis | 4.4417 |
| Mean | 4.7592 |
| MAD | 0.50877 |
| Skewness | -2.475 |
| Sum | 16886 |
| Variance | 0.62724 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 5.05 | 3096 | 87.3% |
|
| 2.76 | 9 | 0.3% |
|
| 2.91 | 8 | 0.2% |
|
| 2.74 | 7 | 0.2% |
|
| 2.73 | 5 | 0.1% |
|
| 2.19 | 4 | 0.1% |
|
| 2.63 | 4 | 0.1% |
|
| 2.58 | 4 | 0.1% |
|
| 2.81 | 4 | 0.1% |
|
| 2.763333333333333 | 4 | 0.1% |
|
| Other values (323) | 403 | 11.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 1.78 | 1 | 0.0% |
|
| 1.7983333333333333 | 1 | 0.0% |
|
| 1.8166666666666667 | 2 | 0.1% |
|
| 1.835 | 1 | 0.0% |
|
| 1.85 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 5.053333333333333 | 1 | 0.0% |
|
| 5.055 | 1 | 0.0% |
|
| 5.056666666666667 | 1 | 0.0% |
|
| 5.058333333333333 | 1 | 0.0% |
|
| 5.06 | 1 | 0.0% |
|
Value_SP500_PBV_RATIO_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_PBV_RATIO_QUARTER and should be ignored for analysis
| Correlation | 0.99857 |
|---|
Value_SP500_PE_RATIO_MONTH
Numeric
| Distinct count | 2218 |
|---|---|
| Unique (%) | 62.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 15.731 |
|---|---|
| Minimum | 5.31 |
| Maximum | 123.73 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 5.31 |
|---|---|
| 5-th percentile | 7.8202 |
| Q1 | 11.429 |
| Median | 14.75 |
| Q3 | 18.03 |
| 95-th percentile | 25.691 |
| Maximum | 123.73 |
| Range | 118.42 |
| Interquartile range | 6.6013 |
Descriptive statistics
| Standard deviation | 8.3823 |
|---|---|
| Coef of variation | 0.53285 |
| Kurtosis | 67.083 |
| Mean | 15.731 |
| MAD | 4.6349 |
| Skewness | 6.4167 |
| Sum | 55814 |
| Variance | 70.263 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 15.61 | 8 | 0.2% |
|
| 12.21 | 7 | 0.2% |
|
| 9.84 | 7 | 0.2% |
|
| 10.34 | 7 | 0.2% |
|
| 14.51 | 6 | 0.2% |
|
| 11.48 | 6 | 0.2% |
|
| 14.75 | 6 | 0.2% |
|
| 10.98 | 6 | 0.2% |
|
| 19.0 | 6 | 0.2% |
|
| 15.11 | 5 | 0.1% |
|
| Other values (2208) | 3484 | 98.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 5.31 | 1 | 0.0% |
|
| 5.359999999999999 | 1 | 0.0% |
|
| 5.41 | 1 | 0.0% |
|
| 5.525 | 1 | 0.0% |
|
| 5.609999999999999 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 119.85 | 1 | 0.0% |
|
| 121.78999999999999 | 1 | 0.0% |
|
| 123.32 | 1 | 0.0% |
|
| 123.525 | 1 | 0.0% |
|
| 123.73 | 1 | 0.0% |
|
Value_SP500_PE_RATIO_YEAR
Numeric
| Distinct count | 3346 |
|---|---|
| Unique (%) | 94.3% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 15.741 |
|---|---|
| Minimum | 5.74 |
| Maximum | 70.91 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 5.74 |
|---|---|
| 5-th percentile | 8.0235 |
| Q1 | 11.653 |
| Median | 15.053 |
| Q3 | 18.011 |
| 95-th percentile | 25.639 |
| Maximum | 70.91 |
| Range | 65.17 |
| Interquartile range | 6.3581 |
Descriptive statistics
| Standard deviation | 6.6443 |
|---|---|
| Coef of variation | 0.42211 |
| Kurtosis | 14.325 |
| Mean | 15.741 |
| MAD | 4.3673 |
| Skewness | 2.7959 |
| Sum | 55848 |
| Variance | 44.147 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 18.08 | 3 | 0.1% |
|
| 17.496666666666666 | 3 | 0.1% |
|
| 15.59 | 3 | 0.1% |
|
| 11.82 | 3 | 0.1% |
|
| 14.835 | 3 | 0.1% |
|
| 17.45 | 3 | 0.1% |
|
| 16.85 | 3 | 0.1% |
|
| 17.21666666666667 | 3 | 0.1% |
|
| 16.3 | 3 | 0.1% |
|
| 12.95125 | 3 | 0.1% |
|
| Other values (3336) | 3518 | 99.2% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 5.74 | 1 | 0.0% |
|
| 5.765000000000001 | 1 | 0.0% |
|
| 5.79 | 1 | 0.0% |
|
| 5.815 | 1 | 0.0% |
|
| 5.832916666666667 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 66.72583333333333 | 1 | 0.0% |
|
| 66.78916666666666 | 1 | 0.0% |
|
| 68.81791666666666 | 1 | 0.0% |
|
| 68.84958333333333 | 1 | 0.0% |
|
| 70.91 | 1 | 0.0% |
|
Value_SP500_PSR_QUARTER
Numeric
| Distinct count | 300 |
|---|---|
| Unique (%) | 8.5% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 1.738 |
|---|---|
| Minimum | 0.8 |
| Maximum | 2.31 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 0.8 |
|---|---|
| 5-th percentile | 1.4339 |
| Q1 | 1.77 |
| Median | 1.77 |
| Q3 | 1.77 |
| 95-th percentile | 1.77 |
| Maximum | 2.31 |
| Range | 1.51 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 0.13545 |
|---|---|
| Coef of variation | 0.077933 |
| Kurtosis | 13.779 |
| Mean | 1.738 |
| MAD | 0.067544 |
| Skewness | -3.163 |
| Sum | 6166.4 |
| Variance | 0.018346 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 1.77 | 3122 | 88.0% |
|
| 1.44 | 10 | 0.3% |
|
| 1.66 | 7 | 0.2% |
|
| 1.52 | 7 | 0.2% |
|
| 2.1 | 7 | 0.2% |
|
| 1.43 | 6 | 0.2% |
|
| 1.46 | 6 | 0.2% |
|
| 1.3166666666666667 | 5 | 0.1% |
|
| 1.47 | 4 | 0.1% |
|
| 1.33 | 4 | 0.1% |
|
| Other values (290) | 370 | 10.4% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 0.8 | 1 | 0.0% |
|
| 0.8116666666666668 | 1 | 0.0% |
|
| 0.8233333333333334 | 1 | 0.0% |
|
| 0.8283333333333334 | 1 | 0.0% |
|
| 0.835 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2.19 | 1 | 0.0% |
|
| 2.205 | 1 | 0.0% |
|
| 2.25 | 1 | 0.0% |
|
| 2.2575000000000003 | 1 | 0.0% |
|
| 2.31 | 1 | 0.0% |
|
Value_SP500_PSR_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_PSR_QUARTER and should be ignored for analysis
| Correlation | 0.98853 |
|---|
Value_SP500_REAL_EARNINGS_GROWTH_QUARTER
Numeric
| Distinct count | 675 |
|---|---|
| Unique (%) | 19.0% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | -1.6836 |
|---|---|
| Minimum | -90.27 |
| Maximum | 905.56 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -90.27 |
|---|---|
| 5-th percentile | -7.94 |
| Q1 | -7.94 |
| Median | -7.94 |
| Q3 | -7.94 |
| 95-th percentile | 16.01 |
| Maximum | 905.56 |
| Range | 995.83 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 56.051 |
|---|---|
| Coef of variation | -33.292 |
| Kurtosis | 184.12 |
| Mean | -1.6836 |
| MAD | 13.005 |
| Skewness | 12.982 |
| Sum | -5973.4 |
| Variance | 3141.7 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| -7.94 | 2856 | 80.5% |
|
| 14.93 | 9 | 0.3% |
|
| 11.38 | 7 | 0.2% |
|
| -0.54 | 2 | 0.1% |
|
| -12.535 | 2 | 0.1% |
|
| -13.59 | 2 | 0.1% |
|
| 14.05 | 2 | 0.1% |
|
| 13.870000000000001 | 1 | 0.0% |
|
| -14.786666666666667 | 1 | 0.0% |
|
| -17.93999999999999 | 1 | 0.0% |
|
| Other values (665) | 665 | 18.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -90.27 | 1 | 0.0% |
|
| -89.69166666666666 | 1 | 0.0% |
|
| -89.11333333333333 | 1 | 0.0% |
|
| -88.535 | 1 | 0.0% |
|
| -88.47166666666666 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 882.12 | 1 | 0.0% |
|
| 887.98 | 1 | 0.0% |
|
| 893.8399999999999 | 1 | 0.0% |
|
| 899.6999999999999 | 1 | 0.0% |
|
| 905.56 | 1 | 0.0% |
|
Value_SP500_REAL_EARNINGS_GROWTH_YEAR
Numeric
| Distinct count | 685 |
|---|---|
| Unique (%) | 19.3% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | -3.8153 |
|---|---|
| Minimum | -79.48 |
| Maximum | 261.66 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -79.48 |
|---|---|
| 5-th percentile | -7.94 |
| Q1 | -7.94 |
| Median | -7.94 |
| Q3 | -7.94 |
| 95-th percentile | 16.192 |
| Maximum | 261.66 |
| Range | 341.14 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 20.964 |
|---|---|
| Coef of variation | -5.4947 |
| Kurtosis | 69.189 |
| Mean | -3.8153 |
| MAD | 8.4864 |
| Skewness | 7.1384 |
| Sum | -13537 |
| Variance | 439.48 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| -7.94 | 2856 | 80.5% |
|
| 14.93 | 9 | 0.3% |
|
| -15.020833333333332 | 1 | 0.0% |
|
| 4.810000000000002 | 1 | 0.0% |
|
| 12.56375 | 1 | 0.0% |
|
| -28.35375 | 1 | 0.0% |
|
| 22.085 | 1 | 0.0% |
|
| 34.3975 | 1 | 0.0% |
|
| -0.12124999999999997 | 1 | 0.0% |
|
| 7.221666666666667 | 1 | 0.0% |
|
| Other values (675) | 675 | 19.0% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -79.48 | 1 | 0.0% |
|
| -77.04541666666668 | 1 | 0.0% |
|
| -74.61083333333335 | 1 | 0.0% |
|
| -72.17625000000001 | 1 | 0.0% |
|
| -69.74166666666667 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 235.16750000000002 | 1 | 0.0% |
|
| 243.99833333333336 | 1 | 0.0% |
|
| 247.44583333333338 | 1 | 0.0% |
|
| 252.82916666666668 | 1 | 0.0% |
|
| 261.66 | 1 | 0.0% |
|
Value_SP500_REAL_PRICE_MONTH
Numeric
| Distinct count | 2827 |
|---|---|
| Unique (%) | 79.7% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 267.14 |
|---|---|
| Minimum | 2.73 |
| Maximum | 2789.8 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 2.73 |
|---|---|
| 5-th percentile | 4.4 |
| Q1 | 7.8138 |
| Median | 16.555 |
| Q3 | 128.36 |
| 95-th percentile | 1428.9 |
| Maximum | 2789.8 |
| Range | 2787.1 |
| Interquartile range | 120.55 |
Descriptive statistics
| Standard deviation | 533.7 |
|---|---|
| Coef of variation | 1.9978 |
| Kurtosis | 5.5084 |
| Mean | 267.14 |
| MAD | 366.21 |
| Skewness | 2.4362 |
| Sum | 947810 |
| Variance | 284830 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 7.68 | 11 | 0.3% |
|
| 5.18 | 9 | 0.3% |
|
| 4.37 | 9 | 0.3% |
|
| 4.46 | 9 | 0.3% |
|
| 2736.61 | 8 | 0.2% |
|
| 5.25 | 8 | 0.2% |
|
| 9.3 | 7 | 0.2% |
|
| 4.54 | 7 | 0.2% |
|
| 8.83 | 6 | 0.2% |
|
| 5.33 | 6 | 0.2% |
|
| Other values (2817) | 3468 | 97.7% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 2.73 | 1 | 0.0% |
|
| 2.79 | 1 | 0.0% |
|
| 2.835 | 1 | 0.0% |
|
| 2.85 | 1 | 0.0% |
|
| 2.94 | 3 | 0.1% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 2736.61 | 8 | 0.2% |
|
| 2745.48 | 1 | 0.0% |
|
| 2747.48 | 1 | 0.0% |
|
| 2754.35 | 1 | 0.0% |
|
| 2789.8 | 1 | 0.0% |
|
Value_SP500_REAL_SALES_GROWTH_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_SALES_GROWTH_YEAR and should be ignored for analysis
| Correlation | 0.9797 |
|---|
Value_SP500_REAL_SALES_GROWTH_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_SALES_GROWTH_QUARTER and should be ignored for analysis
| Correlation | 0.96667 |
|---|
Value_SP500_REAL_SALES_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_REAL_SALES_YEAR and should be ignored for analysis
| Correlation | 0.99537 |
|---|
Value_SP500_REAL_SALES_YEAR
Highly correlated
This variable is highly correlated with Value_SP500_SALES_QUARTER and should be ignored for analysis
| Correlation | 0.90087 |
|---|
Value_SP500_SALES_GROWTH_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_REAL_SALES_GROWTH_QUARTER and should be ignored for analysis
| Correlation | 0.99017 |
|---|
Value_SP500_SALES_GROWTH_YEAR
Numeric
| Distinct count | 394 |
|---|---|
| Unique (%) | 11.1% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | -0.64806 |
|---|---|
| Minimum | -12.86 |
| Maximum | 10.93 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | -12.86 |
|---|---|
| 5-th percentile | -1.18 |
| Q1 | -1.18 |
| Median | -1.18 |
| Q3 | -1.18 |
| 95-th percentile | 5.1897 |
| Maximum | 10.93 |
| Range | 23.79 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 2.4013 |
|---|---|
| Coef of variation | -3.7053 |
| Kurtosis | 11.202 |
| Mean | -0.64806 |
| MAD | 1.1652 |
| Skewness | 2.698 |
| Sum | -2299.3 |
| Variance | 5.766 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| -1.18 | 3144 | 88.6% |
|
| 9.03 | 9 | 0.3% |
|
| 3.92 | 2 | 0.1% |
|
| 2.62 | 2 | 0.1% |
|
| 3.68 | 2 | 0.1% |
|
| 8.94 | 1 | 0.0% |
|
| 6.684166666666667 | 1 | 0.0% |
|
| 10.266666666666666 | 1 | 0.0% |
|
| 1.44 | 1 | 0.0% |
|
| -2.6916666666666664 | 1 | 0.0% |
|
| Other values (384) | 384 | 10.8% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| -12.86 | 1 | 0.0% |
|
| -12.253333333333332 | 1 | 0.0% |
|
| -12.075 | 1 | 0.0% |
|
| -11.646666666666665 | 1 | 0.0% |
|
| -11.29 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 10.921666666666667 | 1 | 0.0% |
|
| 10.92375 | 1 | 0.0% |
|
| 10.925833333333333 | 1 | 0.0% |
|
| 10.927916666666667 | 1 | 0.0% |
|
| 10.93 | 1 | 0.0% |
|
Value_SP500_SALES_QUARTER
Highly correlated
This variable is highly correlated with Value_SP500_SALES_YEAR and should be ignored for analysis
| Correlation | 0.99943 |
|---|
Value_SP500_SALES_YEAR
Numeric
| Distinct count | 421 |
|---|---|
| Unique (%) | 11.9% |
| Missing (%) | 0.0% |
| Missing (n) | 0 |
| Infinite (%) | 0.0% |
| Infinite (n) | 0 |
| Mean | 773.69 |
|---|---|
| Minimum | 674.59 |
| Maximum | 1292.8 |
| Zeros (%) | 0.0% |
Quantile statistics
| Minimum | 674.59 |
|---|---|
| 5-th percentile | 745.7 |
| Q1 | 745.7 |
| Median | 745.7 |
| Q3 | 745.7 |
| 95-th percentile | 1036.8 |
| Maximum | 1292.8 |
| Range | 618.25 |
| Interquartile range | 0 |
Descriptive statistics
| Standard deviation | 96.243 |
|---|---|
| Coef of variation | 0.12439 |
| Kurtosis | 10.629 |
| Mean | 773.69 |
| MAD | 52.026 |
| Skewness | 3.3842 |
| Sum | 2745100 |
| Variance | 9262.8 |
| Memory size | 27.8 KiB |
| Value | Count | Frequency (%) | |
| 745.7 | 3120 | 87.9% |
|
| 1292.84 | 9 | 0.3% |
|
| 1172.2266666666667 | 1 | 0.0% |
|
| 791.7595833333332 | 1 | 0.0% |
|
| 1140.8675 | 1 | 0.0% |
|
| 870.7304166666668 | 1 | 0.0% |
|
| 1240.7733333333333 | 1 | 0.0% |
|
| 1236.1716666666666 | 1 | 0.0% |
|
| 966.465 | 1 | 0.0% |
|
| 775.2766666666666 | 1 | 0.0% |
|
| Other values (411) | 411 | 11.6% |
|
Minimum 5 values
| Value | Count | Frequency (%) | |
| 674.59 | 1 | 0.0% |
|
| 676.0991666666667 | 1 | 0.0% |
|
| 677.1854166666667 | 1 | 0.0% |
|
| 677.6083333333333 | 1 | 0.0% |
|
| 679.1175000000001 | 1 | 0.0% |
|
Maximum 5 values
| Value | Count | Frequency (%) | |
| 1270.4 | 1 | 0.0% |
|
| 1276.01 | 1 | 0.0% |
|
| 1281.62 | 1 | 0.0% |
|
| 1287.23 | 1 | 0.0% |
|
| 1292.84 | 9 | 0.3% |
|
| Value_SP500_REAL_PRICE_MONTH | Value_SP500_DIV_YIELD_MONTH | Value_SP500_PE_RATIO_MONTH | Value_SHILLER_PE_RATIO_MONTH | Value_SP500_EARNINGS_YIELD_MONTH | Value_SP500_INFLADJ_MONTH | Value_SP500_PSR_QUARTER | Value_SP500_DIV_MONTH | Value_SP500_DIV_YEAR | Value_SP500_DIV_GROWTH_YEAR | Value_SP500_DIV_GROWTH_QUARTER | Value_SP500_PBV_RATIO_QUARTER | Value_SHILLER_PE_RATIO_YEAR | Value_SP500_PE_RATIO_YEAR | Value_SP500_DIV_YIELD_YEAR | Value_SP500_PSR_YEAR | Value_SP500_EARNINGS_YIELD_YEAR | Value_SP500_PBV_RATIO_YEAR | Value_SP500_INFLADJ_YEAR | Value_SP500_SALES_YEAR | Value_SP500_SALES_GROWTH_YEAR | Value_SP500_SALES_QUARTER | Value_SP500_REAL_SALES_GROWTH_QUARTER | Value_SP500_SALES_GROWTH_QUARTER | Value_SP500_REAL_SALES_GROWTH_YEAR | Value_SP500_REAL_EARNINGS_GROWTH_YEAR | Value_SP500_REAL_SALES_YEAR | Value_SP500_REAL_EARNINGS_GROWTH_QUARTER | Value_SP500_EARNINGS_GROWTH_QUARTER | Value_SP500_REAL_SALES_QUARTER | Value_SP500_EARNINGS_MONTH | Value_SP500_BVPS_YEAR | Value_SP500_EARNINGS_YEAR | Value_SP500_EARNINGS_GROWTH_YEAR | Value_SP500_BVPS_QUARTER | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||||||||||||||||
| 1871-01-01 | 4.440 | 5.86 | 11.100 | 10.920 | 9.010 | 89.81 | 1.77 | 5.260 | 5.150000 | 13.38 | 13.38 | 5.05 | 11.9 | 11.100000 | 5.860000 | 1.77 | 9.010000 | 5.05 | 89.8100 | 745.7 | -1.18 | 745.7 | -3.66 | -1.18 | -3.66 | -7.94 | 1087.39 | -7.94 | -3.71 | 1087.39 | 8.090 | 290.68 | 7.920000 | -3.71 | 290.68 |
| 1871-01-31 | 4.470 | 5.86 | 11.175 | 10.920 | 8.950 | 89.07 | 1.77 | 5.260 | 5.150000 | 13.38 | 13.38 | 5.05 | 11.9 | 11.140417 | 5.860000 | 1.77 | 8.979583 | 5.05 | 90.1025 | 745.7 | -1.18 | 745.7 | -3.66 | -1.18 | -3.66 | -7.94 | 1087.39 | -7.94 | -3.71 | 1087.39 | 8.090 | 290.68 | 7.920000 | -3.71 | 290.68 |
| 1871-02-01 | 4.500 | 5.82 | 11.250 | 10.920 | 8.890 | 88.33 | 1.77 | 5.180 | 5.151364 | 13.38 | 13.38 | 5.05 | 11.9 | 11.180833 | 5.843182 | 1.77 | 8.949167 | 5.05 | 90.3950 | 745.7 | -1.18 | 745.7 | -3.66 | -1.18 | -3.66 | -7.94 | 1087.39 | -7.94 | -3.71 | 1087.39 | 7.970 | 290.68 | 7.922273 | -3.71 | 290.68 |
| 1871-02-28 | 4.555 | 5.78 | 11.385 | 11.055 | 8.785 | 88.75 | 1.77 | 5.100 | 5.152727 | 13.38 | 13.38 | 5.05 | 11.9 | 11.221250 | 5.826364 | 1.77 | 8.918750 | 5.05 | 90.6875 | 745.7 | -1.18 | 745.7 | -3.66 | -1.18 | -3.66 | -7.94 | 1087.39 | -7.94 | -3.71 | 1087.39 | 7.850 | 290.68 | 7.924545 | -3.71 | 290.68 |
| 1871-03-01 | 4.610 | 5.71 | 11.520 | 11.190 | 8.680 | 89.17 | 1.77 | 5.065 | 5.154091 | 13.38 | 13.38 | 5.05 | 11.9 | 11.261667 | 5.809545 | 1.77 | 8.888333 | 5.05 | 90.9800 | 745.7 | -1.18 | 745.7 | -3.66 | -1.18 | -3.66 | -7.94 | 1087.39 | -7.94 | -3.71 | 1087.39 | 7.795 | 290.68 | 7.926818 | -3.71 | 290.68 |
autocorrelation_plot(df_imputed)
plt.show()
autocorrelation_plot(df_interpolate)
plt.show()
#Corelation plot after imputation
plt.figure(figsize=(20,15))
sns.heatmap(df_imputed.corr(),annot=True,fmt='.2f',square=False)
<matplotlib.axes._subplots.AxesSubplot at 0x1e5333594e0>
#Correlation heatmap plot after interpolation
plt.figure(figsize=(20,15))
sns.heatmap(df_interpolate.corr(),annot=True,fmt='.2f',square=False)
<matplotlib.axes._subplots.AxesSubplot at 0x1e538499eb8>
#Check for missing values per column and create a graph
Column_missing_values(df_imputed)
Column_missing_values(df_interpolate)
No missing values in provided dataframe No missing values in provided dataframe
df_imputed.plot(figsize=(50,30),fontsize=30)
<matplotlib.axes._subplots.AxesSubplot at 0x1e535eb1e10>
df_interpolate.plot(figsize=(30,20),fontsize=20)
<matplotlib.axes._subplots.AxesSubplot at 0x1e537aaf518>
df.hist(figsize=(15,10))
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537DDC898>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537E114E0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537E34E80>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537E63908>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537E942E8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537E94320>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537EE6828>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537F172E8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537F3BD68>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537F69828>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537F992E8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537FBED68>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E537FED828>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53801E2E8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538043D68>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538621828>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5386502E8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538675D68>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5386A3828>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5386D62E8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5386F9D68>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5387254E0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53874BF60>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53877AA20>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538C294E0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538C51F60>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538C7FA20>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538CAF4E0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538CD7F60>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538D04A20>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538D334E0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538D58F60>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538D88A20>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538DB84E0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538DDDF60>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538E0CA20>]],
dtype=object)
df_imputed.hist(figsize=(30,20))
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E538E4FEB8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539078240>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53AF996A0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5391A5048>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5391C2AC8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5391C2B00>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53920D048>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539234A90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539261550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53928AFD0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5392B6A90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5392E8550>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53930DFD0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53933AA90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53936C550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539392FD0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5393BEA90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5393EF550>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539416FD0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539443A90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539472550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53949BFD0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5394C8A90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5394F7550>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53951DFD0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53954BA90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53957C550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5395A2FD0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5395CE748>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5395FF208>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539624C88>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539651748>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539686208>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5396AAC88>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E5396D7748>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E539709208>]],
dtype=object)
#Histogram plot of all variables in interpolated dataset
df_interpolate.hist(figsize=(30,20))
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BB8C390>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53B7148D0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BAD1748>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BC79C50>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BC99710>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BC99748>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BCE0C18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BD0F6D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BD3C198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BD63C18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BD926D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BDC1198>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BDE7C18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BE156D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BE47198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BE6AC18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BE9A6D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BECA198>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BEEEC18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BF1F6D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BF4D198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BF75C18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BFA26D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BFD2198>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53BFF5C18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C0256D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C056198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C07AC18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C0AB6D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C0DB198>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C0FFC18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C12D6D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C15E198>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C182C18>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C1B16D8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x000001E53C1E5198>]],
dtype=object)
linear Interpolation on missing values looks promising and there is better correlation between variables of the dataset. Imputation has not shown better correlation between variables. I would use interpolation dataframe for further analysis.
save df, df_imputed and df_iterpolated to csv file.
#Write dfs to csv file.
df.to_csv('SandP500_Index_Master.csv',index=False)
df_imputed.to_csv('SandP500_Index_df_imputed.csv',index=False)
df_interpolate.to_csv('SandP500_Index_df_Interpolated.csv',index=False)
Apply StandardScaler() to the imputed df and interpolated df to normalize feature values.
# Apply feature scaling on all values to the entire numerical dataframe.define function
def Apply_Standard_Scaler(df):
'''
This funtion applies StandardScaler() to columns/features of a given dataframe.
IN- a pandas dataframe
OUT - df_scaled_features dataframe of scaled features.
scaler - StandardScaler() object.
'''
scaler = StandardScaler()
scaled_features = StandardScaler().fit_transform(df.values)
df_scaled_features = pd.DataFrame(scaled_features,index=df.index,columns = df.columns)
#df_scaled_features.describe()
return df_scaled_features, scaler
print ('Number of columns present in imputed and interpolated datasets are : {} & {}'.format (
len(df_imputed.columns),len(df_interpolate.columns)))
Number of columns present in imputed and interpolated datasets are : 35 & 35
# Apply standard scaling function to both dfs and return scaled df.
df_scaled_features_imputed, scaler_df_imputed = Apply_Standard_Scaler(df_imputed)
df_scaled_features_interpolation, scaler_df_interpolate = Apply_Standard_Scaler(df_interpolate)
Apply PCA feature extraction analysis to find groups of features with highest and lowest variance.
#function to apply PCA feature scaling
def scree_plot(pca):
'''
Creates a scree plot associated with the principal components
INPUT: pca - the result of instance of PCA in scikit learn
OUTPUT:
None
'''
num_components = len(pca.explained_variance_ratio_)
ind = np.arange(num_components)
vals = pca.explained_variance_ratio_
plt.figure(figsize=(25, 10))
ax = plt.subplot(111)
cumvals = np.cumsum(vals)
ax.bar(ind, vals)
ax.plot(ind, cumvals)
#print (ind, cumvals)
for i in range(num_components):
ax.annotate(r"%s%%" % ((str(vals[i]*100)[:4])), (ind[i]+0.2, vals[i]), va="bottom", ha="center", fontsize=12)
ax.xaxis.set_tick_params(width=0)
ax.yaxis.set_tick_params(width=2, length=12)
ax.set_xlabel("Principal Component")
ax.set_ylabel("Variance Explained (%)")
plt.title('Explained Variance Per Principal Component')
# Apply PCA to the data for all features
def Apply_PCA(df_scaled_features,n_components):
'''
This function would create a PCA object in scikit-learn with n_components and apply fit_transform function of PCA on scaled features df.
This function internally calls another 'scree_plot' function to create a chart of feature variance.
IN -
df_scaled_features - numerically scaled dataframe.
n_components - n number of components for PCA feature analysis
Output- Returns PCA object with provided n_components
'''
pca = PCA(n_components)
pca_scaled_features = pca.fit_transform(df_scaled_features)
scree_plot(pca)
return pca
# Map weights for the first principal component to corresponding feature names
# and then print the linked values, sorted by weight.
def sorted_weights(pca, ix, dataset):
"""
Docstring- map the weights and components from PCA analysis.
Input parameters-
Input - pca initialized model
ix = index number of first set of components.
dataset = a dataframe of scaled features
Output- A Tuple of features with variance.
Prints a bar chart with feature names and related variance.
"""
a1 = pca.components_[ix]
a2 = dataset.keys().values
a = list(zip(a1, a2))
a.sort(key=lambda tup: tup[0])
x_labels = [val[0] for val in a]
y_labels = [val[1] for val in a]
plt.Figure(figsize=(20, 15))
ax = pd.Series(x_labels).plot(kind='bar')
ax.set_xticklabels(y_labels)
rects = ax.patches
# for rect, label in zip(rects, x_labels):
# height = rect.get_height()
# ax.text(rect.get_x() + rect.get_width()/2, height + 5, label, ha='center', va='bottom')
return a
#Cluster of of features with highest variance
def Print_PCAfeatures_graph(df,a,n):
'''
Function to print PCA features in line chart with top n lowest variance or top n highest variance for a dataframe.
IN-
df- dataframe on which PCA analysis was done.
a - result from 'sorted_weights' function. A sequence of 2-d array with sorted weights of features.
n- number of features required to be printed in the chart. Negative (-n) shall show features with positive variance while
positive (n) would show features with negative variance.
OUT- name of features from cluster a.
'''
groups = []
for i in range(len(a)):
groups.append(a[i][1])
#Slice 1-D array appropriately
if n < 0:
groups = groups[n:]
else:
groups = groups[:n]
#print(groups)
i = 1
for group in groups:
plt.subplot(len(groups), 1, i)
plt.plot(df[group].values)
plt.title(group, y=0.5, loc='center')
i += 1
plt.figure(figsize=(30,20))
plt.show()
return groups
Visualize PCA feature extraction and variance for imputed and interpolated dfs
#Apply PCA for all features except target output.
pca_imputed = Apply_PCA(df_scaled_features_imputed.drop(['Value_SP500_REAL_PRICE_MONTH'],axis=1),n_components=10)
#Apply PCA for all features except target output.
pca_interpolated = Apply_PCA(df_scaled_features_interpolation.drop(['Value_SP500_REAL_PRICE_MONTH'],axis=1),n_components=10)
Visualize weights of features for first group of extracted features with highest variance
# #Cluster of of features with highest variance
# def Print_PCAfeatures_graph(df,a,n):
# '''
# Function to print PCA features in line chart with top n lowest variance or top n highest variance for a dataframe.
# IN-
# df- dataframe on which PCA analysis was done.
# a - result from 'sorted_weights' function. A sequence of 2-d array with sorted weights of features.
# n- number of features required to be printed in the chart. Negative (-n) shall show features with maximum variance while
# positive n would show features with lowest variance.
# OUT- name of features from cluster a.
# '''
# groups = []
# for i in range(len(a)):
# groups.append(a[i][1])
# #Slice 1-D array appropriately
# if n < 0:
# groups = groups[n:]
# else:
# groups = groups[:n]
# #print(groups)
# i = 1
# for group in groups:
# plt.subplot(len(groups), 1, i)
# plt.plot(df[group].values)
# plt.title(group, y=0.5, loc='center')
# i += 1
# plt.figure(figsize=(30,20))
# plt.show()
# return groups
#List cluster of features with highest variance from PCA analysis
a = sorted_weights(pca_imputed,1,df_scaled_features_imputed)
#print chart with top 5 features with maximum variance
Print_PCAfeatures_graph(df_imputed,a,-5)
<Figure size 3000x2000 with 0 Axes>
['Value_SHILLER_PE_RATIO_YEAR', 'Value_SP500_EARNINGS_YIELD_MONTH', 'Value_SP500_PE_RATIO_MONTH', 'Value_SP500_PBV_RATIO_YEAR', 'Value_SP500_PBV_RATIO_QUARTER']
#print chart with top 5 features with lowest variance
Print_PCAfeatures_graph(df_imputed,a,5)
<Figure size 3000x2000 with 0 Axes>
['Value_SHILLER_PE_RATIO_MONTH', 'Value_SP500_PSR_QUARTER', 'Value_SP500_REAL_SALES_QUARTER', 'Value_SP500_PSR_YEAR', 'Value_SP500_REAL_SALES_GROWTH_QUARTER']
b = sorted_weights(pca_interpolated,1,df_scaled_features_interpolation)
b
[(-0.3099270964617896, 'Value_SP500_DIV_GROWTH_YEAR'), (-0.304721029314339, 'Value_SP500_DIV_YEAR'), (-0.2470383278900263, 'Value_SP500_REAL_SALES_GROWTH_QUARTER'), (-0.2387458673708968, 'Value_SP500_SALES_YEAR'), (-0.23832544559286753, 'Value_SP500_SALES_QUARTER'), (-0.22862267710171633, 'Value_SP500_SALES_GROWTH_QUARTER'), (-0.2276975766969925, 'Value_SP500_EARNINGS_GROWTH_QUARTER'), (-0.22053633106204035, 'Value_SP500_REAL_EARNINGS_GROWTH_YEAR'), (-0.18570532863264608, 'Value_SHILLER_PE_RATIO_MONTH'), (-0.1803014408947123, 'Value_SP500_PSR_YEAR'), (-0.14675080518992648, 'Value_SP500_SALES_GROWTH_YEAR'), (-0.14261472452590387, 'Value_SP500_INFLADJ_YEAR'), (-0.13310173004335485, 'Value_SP500_PE_RATIO_YEAR'), (-0.13165915021671848, 'Value_SP500_REAL_PRICE_MONTH'), (-0.1275332154522863, 'Value_SP500_INFLADJ_MONTH'), (-0.11806250612104158, 'Value_SP500_EARNINGS_GROWTH_YEAR'), (-0.11776297826352891, 'Value_SP500_EARNINGS_MONTH'), (-0.1168244365989096, 'Value_SP500_DIV_YIELD_YEAR'), (-0.06950409932183269, 'Value_SP500_REAL_SALES_QUARTER'), (-0.05601635917739762, 'Value_SP500_BVPS_YEAR'), (-0.02714587682828222, 'Value_SP500_PSR_QUARTER'), (-0.02606010953354218, 'Value_SP500_DIV_MONTH'), (-0.007376927107058928, 'Value_SP500_DIV_GROWTH_QUARTER'), (-0.002314480823035312, 'Value_SP500_EARNINGS_YIELD_YEAR'), (0.02256896923088267, 'Value_SP500_EARNINGS_YIELD_MONTH'), (0.025265445913568784, 'Value_SP500_PBV_RATIO_YEAR'), (0.1255333992926099, 'Value_SP500_PE_RATIO_MONTH'), (0.1287789446050626, 'Value_SP500_PBV_RATIO_QUARTER'), (0.16520849173637447, 'Value_SP500_REAL_EARNINGS_GROWTH_QUARTER'), (0.16883681596348415, 'Value_SP500_REAL_SALES_YEAR'), (0.20153174127326595, 'Value_SHILLER_PE_RATIO_YEAR'), (0.2157723138756866, 'Value_SP500_EARNINGS_YEAR'), (0.21697622894808202, 'Value_SP500_DIV_YIELD_MONTH'), (0.2211050245648737, 'Value_SP500_REAL_SALES_GROWTH_YEAR')]
#print chart with top 5 features with maximum variance
Print_PCAfeatures_graph(df_interpolate,b,-6)
<Figure size 3000x2000 with 0 Axes>
['Value_SP500_REAL_EARNINGS_GROWTH_QUARTER', 'Value_SP500_REAL_SALES_YEAR', 'Value_SHILLER_PE_RATIO_YEAR', 'Value_SP500_EARNINGS_YEAR', 'Value_SP500_DIV_YIELD_MONTH', 'Value_SP500_REAL_SALES_GROWTH_YEAR']
Print_PCAfeatures_graph(df_interpolate,b,8)
<Figure size 3000x2000 with 0 Axes>
['Value_SP500_DIV_GROWTH_YEAR', 'Value_SP500_DIV_YEAR', 'Value_SP500_REAL_SALES_GROWTH_QUARTER', 'Value_SP500_SALES_YEAR', 'Value_SP500_SALES_QUARTER', 'Value_SP500_SALES_GROWTH_QUARTER', 'Value_SP500_EARNINGS_GROWTH_QUARTER', 'Value_SP500_REAL_EARNINGS_GROWTH_YEAR']
Split 80% of dataset into train and 20% into test based on chronological order because its a timeseries. Avoid look-ahead bias by doing this.
def Create_Training_Test_Dataset(df,split_percent,Linear_regr):
'''
This function would split, slice and create training and test datasets. Provide 'Value_SP500_REAL_PRICE_MONTH' in the input
df. It would input to X
IN- df- a dataframe from which training and test dataset needs to slice.
split_percent - split percent for training and test dataset.
Linear_regr - A flag to split between dataset for linear regression object or LSTM network. Works for this project only.
OUT- X_train, Y_train, X_test, Y_test
'''
# Split the size into 80% and 20% based on rows.
train_size = int(len(df) * split_percent)
test_size = len(df) - train_size
#print(train_size,test_size)
print('Training and Test dataset is of size {} & {}'.format(train_size,test_size))
#Slice the df into train and test df.
train = df.iloc[0:train_size,:]
test = df.iloc[train_size:len(df),:]
#print(train.shape, test.shape)
#Check for Linear Regression flag from user input. If false it would return dataset for LSTM neural network.
if Linear_regr == 'False':
#Create Training dataset
temp_train = train.drop(['Value_SP500_REAL_PRICE_MONTH'],axis=1,inplace=False)
X_train = temp_train.iloc[0:train_size,:]
#X_train.head()
Y_train = train.iloc[0:train_size,:1]
#print(Y_train.head())
#print(X_train.shape,Y_train.shape)
#print(X_train.columns,Y_train.columns)
print('Features size of X_train and training target Y_train shape is {} & {}'.format(X_train.shape,Y_train.shape))
#Define Test dataset
temp_test = test.drop(['Value_SP500_REAL_PRICE_MONTH'],axis=1,inplace=False)
X_test = temp_test.iloc[0:test_size,:]
#X_train.head()
Y_test = test.iloc[0:test_size,:1]
#print(Y_train.head())
#print(X_test.shape,Y_test.shape)
#print(X_test.columns,Y_test.columns)
print('Features size of X_test and Test target Y_test shape is {} & {}'.format(X_test.shape,Y_test.shape))
else:
#Train dataset
X_train = train['Value_SP500_REAL_PRICE_MONTH'][0:train_size]
Y_train = train['Value_SP500_REAL_PRICE_MONTH'][0:test_size]
print('Features size of X_train and training target Y_train shape is {} & {}'.format(X_train.shape,Y_train.shape))
#Test dataset
X_test = test['Value_SP500_REAL_PRICE_MONTH'][0:train_size]
Y_test = test['Value_SP500_REAL_PRICE_MONTH'][0:test_size]
print('Features size of X_test and Test target Y_test shape is {} & {}'.format(X_test.shape,Y_test.shape))
return X_train, Y_train, X_test, Y_test
def Convert_dataset_nparray(X_train, Y_train, X_test, Y_test):
'''
This function would convert the training and test dataset to np.array.
In- X_train, Y_train, X_test, Y_test
OUT- np.array of X_train, Y_train, X_test, Y_test
'''
#Convert to np array as required for LSTM model.
X_train = np.array(X_train)
Y_train = np.array(Y_train)
#print(X_train.shape,Y_train.shape)
print('Training dataset is converted to np.array with size {} & {}'.format(X_train.shape,Y_train.shape))
#Convert test dataset to np.array.
X_test = np.array(X_test)
Y_test = np.array(Y_test)
#print(X_test.shape,Y_test.shape)
print('Test dataset is converted to np.array with size {} & {}'.format(X_test.shape,Y_test.shape))
return X_train, Y_train, X_test, Y_test
Multivariate time-series prediction
# from subprocess import check_output
# from keras.layers.core import Dense, Activation, Dropout
# from keras.layers.recurrent import LSTM
# from keras.layers.embeddings import Embedding
# from keras.models import Sequential
# from keras.layers import LSTM, CuDNNLSTM , BatchNormalization
# import tensorflow as tf
# from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
# import time
# from numpy import newaxis
# #Build Model
# model = Sequential()
# model.add(LSTM(input_dim=1,output_dim=50,return_sequences=True))
# model.add(Dropout(0.2))
# model.add(LSTM(100,return_sequences=False))
# model.add(Dropout(0.2))
# model.add(Dense(output_dim=1))
# model.add(Activation('linear'))
# start = time.time()
# model.compile(loss='mse', optimizer='rmsprop')
# print ('compilation time : ', time.time() - start)
# #Build the model
# model = Sequential()
# model.add(LSTM(256,input_shape=(2837,34)))
# model.add(Dense(1))
# model.compile(optimizer='adam',loss='mse')
# #Reshape data for (Sample,Timestep,Features)
# #X_train = X_train.reshape((X_train.shape[0],X_train.shape[1],1))
# #X_test = X_test.reshape((X_test.shape[0],X_test.shape[1],1))
# #Fit model with history to check for overfitting
# #history = model.fit(X_train,y_train,epochs=300,validation_data=(X_test,y_test),shuffle=False)
#model.fit(X_train,Y_train,batch_size=128,epochs=10,validation_split=0.05)
# model = Sequential([
# Dense(32, input_shape=(2837,34)),
# Activation('relu'),
# Dense(10),
# Activation('softmax'),
# ])
# #Compile
# # For a mean squared error regression problem
# model.compile(optimizer='rmsprop',
# loss='mse')
df_interpolate.columns
Index(['Value_SP500_REAL_PRICE_MONTH', 'Value_SP500_DIV_YIELD_MONTH',
'Value_SP500_PE_RATIO_MONTH', 'Value_SHILLER_PE_RATIO_MONTH',
'Value_SP500_EARNINGS_YIELD_MONTH', 'Value_SP500_INFLADJ_MONTH',
'Value_SP500_PSR_QUARTER', 'Value_SP500_DIV_MONTH',
'Value_SP500_DIV_YEAR', 'Value_SP500_DIV_GROWTH_YEAR',
'Value_SP500_DIV_GROWTH_QUARTER', 'Value_SP500_PBV_RATIO_QUARTER',
'Value_SHILLER_PE_RATIO_YEAR', 'Value_SP500_PE_RATIO_YEAR',
'Value_SP500_DIV_YIELD_YEAR', 'Value_SP500_PSR_YEAR',
'Value_SP500_EARNINGS_YIELD_YEAR', 'Value_SP500_PBV_RATIO_YEAR',
'Value_SP500_INFLADJ_YEAR', 'Value_SP500_SALES_YEAR',
'Value_SP500_SALES_GROWTH_YEAR', 'Value_SP500_SALES_QUARTER',
'Value_SP500_REAL_SALES_GROWTH_QUARTER',
'Value_SP500_SALES_GROWTH_QUARTER',
'Value_SP500_REAL_SALES_GROWTH_YEAR',
'Value_SP500_REAL_EARNINGS_GROWTH_YEAR', 'Value_SP500_REAL_SALES_YEAR',
'Value_SP500_REAL_EARNINGS_GROWTH_QUARTER',
'Value_SP500_EARNINGS_GROWTH_QUARTER', 'Value_SP500_REAL_SALES_QUARTER',
'Value_SP500_EARNINGS_MONTH', 'Value_SP500_BVPS_YEAR',
'Value_SP500_EARNINGS_YEAR', 'Value_SP500_EARNINGS_GROWTH_YEAR',
'Value_SP500_BVPS_QUARTER'],
dtype='object')
#Not using PCA features here because the model resulted in huge mse errors. I think monthly features should be used which could
#predict the real price correctly.
df_PCA_features = df_interpolate.loc[:,['Value_SP500_REAL_PRICE_MONTH',
'Value_SP500_DIV_YIELD_MONTH',
'Value_SP500_PE_RATIO_MONTH',
'Value_SHILLER_PE_RATIO_MONTH',
'Value_SP500_EARNINGS_YIELD_MONTH',
'Value_SP500_INFLADJ_MONTH',
'Value_SP500_EARNINGS_MONTH','Value_SP500_PSR_QUARTER','Value_SP500_SALES_QUARTER',
'Value_SP500_REAL_SALES_GROWTH_QUARTER','Value_SP500_REAL_EARNINGS_GROWTH_QUARTER']]
df_PCA_features.shape
(3548, 11)
#apply scaling
scaler_df_imputed = StandardScaler()
scaled_features = scaler_df_imputed.fit_transform(df_PCA_features.values)
df_scaled_features_interpol = pd.DataFrame(scaled_features,index=df_PCA_features.index,columns = df_PCA_features.columns)
#df_scaled_features.describe()
#Use real data
#X_train, Y_train, X_test, Y_test = Create_Training_Test_Dataset(df=df_PCA_features, split_percent=0.95, Linear_regr='False')
#X_train, Y_train, X_test, Y_test = Convert_dataset_nparray(X_train, Y_train, X_test, Y_test)
#Split master dataframe into training and test datasets
X_train, Y_train, X_test, Y_test = Create_Training_Test_Dataset(df=df_PCA_features, split_percent=0.98, Linear_regr='False')
X_train, Y_train, X_test, Y_test = Convert_dataset_nparray(X_train, Y_train, X_test, Y_test)
Training and Test dataset is of size 3477 & 71 Features size of X_train and training target Y_train shape is (3477, 10) & (3477, 1) Features size of X_test and Test target Y_test shape is (71, 10) & (71, 1) Training dataset is converted to np.array with size (3477, 10) & (3477, 1) Test dataset is converted to np.array with size (71, 10) & (71, 1)
#Shape training data.
#the inputs (X) are reshaped into the 3D format expected by LSTM, namely [samples, timesteps, features]
X_train = X_train.reshape((X_train.shape[0],1,10))
#Y_train = Y_train.reshape((Y_train.shape[0],1,1)) #Do not reshape it.
print(X_train.shape,Y_train.shape)
print(X_train.shape[1],X_train.shape[2])
#Shape test dataset correctly for LSTM predict.
X_test = X_test.reshape((X_test.shape[0],1,10))
#Y_test = Y_test.reshape((Y_test.shape[0],Y_test.shape[1],1))
print(X_test.shape,Y_test.shape)
print('X_test input shape : ', X_test.shape[0],X_test.shape[1],X_test.shape[2])
(3477, 1, 10) (3477, 1) 1 10 (71, 1, 10) (71, 1) X_test input shape : 71 1 10
# #Design network for mean squared error regression problem
# model = Sequential()
# model.add(LSTM(512, input_shape=(X_train.shape[1], X_train.shape[2])))
# model.add(Dropout(0.2))
# Activation('relu')
# model.add(Dense(1))
# Activation('softmax')
# # Compiling the model using mean square error loss, and Adam optimizer.
# model.compile(loss='mse', optimizer='adam',metrics=['accuracy'])
# #model.compile(loss='mse',optimizer='rmsprop',metrics=['accuracy'])
# max_features = 1024
# model = Sequential()
# model.add(Embedding(max_features, output_dim=256))
# model.add(LSTM(128))
# model.add(Dropout(0.5))
# model.add(Dense(1, activation='sigmoid'))
# model.compile(loss='binary_crossentropy',
# optimizer='rmsprop',
# metrics=['accuracy'])
#Design network for mean squared error regression problem
model = Sequential()
#The first dimension is supposed to be each sample.input should be (n_samples, timesteps, n_features)
model.add(LSTM(512, input_shape=(X_train.shape[1],X_train.shape[2])))
model.add(Dropout(0.2))
#model.add(BatchNormalization())
# model.add(LSTM(256, input_shape=(X_train.shape[0],1,X_train.shape[2]),return_sequences=True))
# model.add(Dropout(0.1))
# #model.add(BatchNormalization())
Activation('relu')
model.add(Dense(1))
#Activation('softmax')
# Compiling the model using mean square error loss, and Adam optimizer.
model.compile(loss='mse', optimizer='adam',metrics=['accuracy'])
#model.compile(loss='mse',optimizer='rmsprop',metrics=['accuracy'])
filepath = "RNN_final-{epoch:02d}-{val_acc:.3f}"#unique file name which will include epochs and validation accuracy score.
checkpoint = ModelCheckpoint("models/{}.model".format(filepath,monitor = 'val_acc',verbose = 1))
print(model.summary)
<bound method Network.summary of <keras.engine.sequential.Sequential object at 0x000001E54D6F5F98>>
# fit network with epochs
history = model.fit(X_train, Y_train, epochs=500, batch_size=100, validation_data=(X_train, Y_train), verbose=2,
callbacks = [checkpoint],
shuffle=False)
model.save('LSTM_model',overwrite=True,include_optimizer=True)
Train on 3477 samples, validate on 3477 samples Epoch 1/500 - 3s - loss: 243339.4370 - acc: 0.0000e+00 - val_loss: 242714.4353 - val_acc: 0.0000e+00 Epoch 2/500 - 1s - loss: 241162.7605 - acc: 0.0000e+00 - val_loss: 240804.1092 - val_acc: 2.8760e-04 Epoch 3/500 - 1s - loss: 239919.4807 - acc: 0.0000e+00 - val_loss: 239705.8956 - val_acc: 2.8760e-04 Epoch 4/500 - 1s - loss: 238994.8505 - acc: 0.0000e+00 - val_loss: 238805.9905 - val_acc: 0.0000e+00 Epoch 5/500 - 1s - loss: 238148.2302 - acc: 0.0000e+00 - val_loss: 237966.7018 - val_acc: 0.0000e+00 Epoch 6/500 - 1s - loss: 237272.5937 - acc: 0.0000e+00 - val_loss: 237097.1663 - val_acc: 0.0000e+00 Epoch 7/500 - 1s - loss: 236451.4622 - acc: 0.0000e+00 - val_loss: 236272.0884 - val_acc: 0.0000e+00 Epoch 8/500 - 1s - loss: 235675.5565 - acc: 2.8760e-04 - val_loss: 235472.4969 - val_acc: 2.8760e-04 Epoch 9/500 - 1s - loss: 234850.1869 - acc: 0.0000e+00 - val_loss: 234682.3712 - val_acc: 0.0000e+00 Epoch 10/500 - 1s - loss: 234135.3833 - acc: 2.8760e-04 - val_loss: 233926.5933 - val_acc: 0.0000e+00 Epoch 11/500 - 1s - loss: 233322.8948 - acc: 0.0000e+00 - val_loss: 233148.9700 - val_acc: 0.0000e+00 Epoch 12/500 - 1s - loss: 232576.8213 - acc: 0.0000e+00 - val_loss: 232353.4952 - val_acc: 0.0000e+00 Epoch 13/500 - 1s - loss: 231753.3019 - acc: 0.0000e+00 - val_loss: 231597.8444 - val_acc: 2.8760e-04 Epoch 14/500 - 1s - loss: 231032.9564 - acc: 2.8760e-04 - val_loss: 230852.7865 - val_acc: 0.0000e+00 Epoch 15/500 - 1s - loss: 230292.6349 - acc: 2.8760e-04 - val_loss: 230112.4808 - val_acc: 0.0000e+00 Epoch 16/500 - 1s - loss: 229526.3014 - acc: 0.0000e+00 - val_loss: 229379.9607 - val_acc: 0.0000e+00 Epoch 17/500 - 1s - loss: 228825.9301 - acc: 2.8760e-04 - val_loss: 228661.3356 - val_acc: 0.0000e+00 Epoch 18/500 - 2s - loss: 228118.5561 - acc: 0.0000e+00 - val_loss: 227955.8876 - val_acc: 0.0000e+00 Epoch 19/500 - 2s - loss: 227397.1509 - acc: 0.0000e+00 - val_loss: 227261.4097 - val_acc: 0.0000e+00 Epoch 20/500 - 2s - loss: 226751.8984 - acc: 0.0000e+00 - val_loss: 226573.6249 - val_acc: 0.0000e+00 Epoch 21/500 - 2s - loss: 226037.4590 - acc: 2.8760e-04 - val_loss: 225894.3047 - val_acc: 2.8760e-04 Epoch 22/500 - 2s - loss: 225569.3964 - acc: 0.0000e+00 - val_loss: 225315.5182 - val_acc: 0.0000e+00 Epoch 23/500 - 2s - loss: 224719.4619 - acc: 0.0000e+00 - val_loss: 224562.6777 - val_acc: 0.0000e+00 Epoch 24/500 - 2s - loss: 224061.0709 - acc: 0.0000e+00 - val_loss: 223905.0793 - val_acc: 0.0000e+00 Epoch 25/500 - 2s - loss: 223420.1020 - acc: 0.0000e+00 - val_loss: 223248.1206 - val_acc: 0.0000e+00 Epoch 26/500 - 1s - loss: 222702.2231 - acc: 0.0000e+00 - val_loss: 222581.2486 - val_acc: 0.0000e+00 Epoch 27/500 - 1s - loss: 222100.0577 - acc: 2.8760e-04 - val_loss: 221942.7014 - val_acc: 0.0000e+00 Epoch 28/500 - 2s - loss: 221483.1033 - acc: 2.8760e-04 - val_loss: 221305.8124 - val_acc: 2.8760e-04 Epoch 29/500 - 1s - loss: 221164.4045 - acc: 0.0000e+00 - val_loss: 220921.8952 - val_acc: 0.0000e+00 Epoch 30/500 - 1s - loss: 220465.0957 - acc: 0.0000e+00 - val_loss: 220299.0549 - val_acc: 0.0000e+00 Epoch 31/500 - 1s - loss: 219756.9181 - acc: 0.0000e+00 - val_loss: 219681.9905 - val_acc: 0.0000e+00 Epoch 32/500 - 2s - loss: 219298.7041 - acc: 2.8760e-04 - val_loss: 219062.2763 - val_acc: 0.0000e+00 Epoch 33/500 - 2s - loss: 218690.2736 - acc: 0.0000e+00 - val_loss: 218477.0263 - val_acc: 0.0000e+00 Epoch 34/500 - 2s - loss: 218257.6683 - acc: 0.0000e+00 - val_loss: 218160.7385 - val_acc: 0.0000e+00 Epoch 35/500 - 2s - loss: 217705.7209 - acc: 0.0000e+00 - val_loss: 217522.5719 - val_acc: 2.8760e-04 Epoch 36/500 - 2s - loss: 216778.8285 - acc: 0.0000e+00 - val_loss: 216696.9238 - val_acc: 0.0000e+00 Epoch 37/500 - 1s - loss: 216299.8198 - acc: 0.0000e+00 - val_loss: 216088.7995 - val_acc: 0.0000e+00 Epoch 38/500 - 2s - loss: 215750.9451 - acc: 2.8760e-04 - val_loss: 215506.2003 - val_acc: 0.0000e+00 Epoch 39/500 - 2s - loss: 215045.0029 - acc: 0.0000e+00 - val_loss: 214934.4999 - val_acc: 0.0000e+00 Epoch 40/500 - 2s - loss: 214524.8656 - acc: 0.0000e+00 - val_loss: 214361.9306 - val_acc: 0.0000e+00 Epoch 41/500 - 2s - loss: 213993.4389 - acc: 0.0000e+00 - val_loss: 213801.9505 - val_acc: 0.0000e+00 Epoch 42/500 - 2s - loss: 213371.5354 - acc: 0.0000e+00 - val_loss: 213238.8752 - val_acc: 0.0000e+00 Epoch 43/500 - 1s - loss: 212892.6655 - acc: 0.0000e+00 - val_loss: 212686.4150 - val_acc: 2.8760e-04 Epoch 44/500 - 1s - loss: 212655.3949 - acc: 0.0000e+00 - val_loss: 212486.1022 - val_acc: 2.8760e-04 Epoch 45/500 - 1s - loss: 211995.9674 - acc: 0.0000e+00 - val_loss: 211933.9086 - val_acc: 0.0000e+00 Epoch 46/500 - 1s - loss: 211515.5962 - acc: 0.0000e+00 - val_loss: 211393.3153 - val_acc: 0.0000e+00 Epoch 47/500 - 1s - loss: 210926.3843 - acc: 0.0000e+00 - val_loss: 210848.7520 - val_acc: 0.0000e+00 Epoch 48/500 - 1s - loss: 211180.3884 - acc: 0.0000e+00 - val_loss: 210646.3960 - val_acc: 0.0000e+00 Epoch 49/500 - 2s - loss: 209984.0191 - acc: 0.0000e+00 - val_loss: 209739.4114 - val_acc: 0.0000e+00 Epoch 50/500 - 2s - loss: 209357.7884 - acc: 0.0000e+00 - val_loss: 209214.0551 - val_acc: 0.0000e+00 Epoch 51/500 - 2s - loss: 208866.4859 - acc: 2.8760e-04 - val_loss: 208688.0837 - val_acc: 0.0000e+00 Epoch 52/500 - 2s - loss: 208274.7003 - acc: 2.8760e-04 - val_loss: 208169.3333 - val_acc: 0.0000e+00 Epoch 53/500 - 1s - loss: 207703.4854 - acc: 0.0000e+00 - val_loss: 207652.7864 - val_acc: 0.0000e+00 Epoch 54/500 - 2s - loss: 207361.9242 - acc: 0.0000e+00 - val_loss: 207144.4490 - val_acc: 0.0000e+00 Epoch 55/500 - 2s - loss: 206746.4984 - acc: 2.8760e-04 - val_loss: 206637.0927 - val_acc: 2.8760e-04 Epoch 56/500 - 2s - loss: 206777.9321 - acc: 2.8760e-04 - val_loss: 206598.0003 - val_acc: 2.8760e-04 Epoch 57/500 - 2s - loss: 206293.4617 - acc: 0.0000e+00 - val_loss: 206095.5555 - val_acc: 2.8760e-04 Epoch 58/500 - 2s - loss: 205751.3194 - acc: 0.0000e+00 - val_loss: 205602.5550 - val_acc: 5.7521e-04 Epoch 59/500 - 2s - loss: 205390.5106 - acc: 0.0000e+00 - val_loss: 205097.7249 - val_acc: 2.8760e-04 Epoch 60/500 - 2s - loss: 204818.5186 - acc: 2.8760e-04 - val_loss: 204596.0372 - val_acc: 0.0000e+00 Epoch 61/500 - 2s - loss: 204105.1385 - acc: 0.0000e+00 - val_loss: 204100.3282 - val_acc: 2.8760e-04 Epoch 62/500 - 2s - loss: 203824.3437 - acc: 2.8760e-04 - val_loss: 203621.5036 - val_acc: 2.8760e-04 Epoch 63/500 - 2s - loss: 203475.2630 - acc: 0.0000e+00 - val_loss: 203151.2190 - val_acc: 0.0000e+00 Epoch 64/500 - 2s - loss: 202766.9528 - acc: 0.0000e+00 - val_loss: 202667.7471 - val_acc: 5.7521e-04 Epoch 65/500 - 2s - loss: 202312.7902 - acc: 2.8760e-04 - val_loss: 202193.4177 - val_acc: 0.0000e+00 Epoch 66/500 - 2s - loss: 202356.4267 - acc: 0.0000e+00 - val_loss: 202120.2318 - val_acc: 2.8760e-04 Epoch 67/500 - 2s - loss: 201901.7359 - acc: 8.6281e-04 - val_loss: 201647.6772 - val_acc: 0.0000e+00 Epoch 68/500 - 2s - loss: 201351.1909 - acc: 2.8760e-04 - val_loss: 201172.3936 - val_acc: 0.0000e+00 Epoch 69/500 - 2s - loss: 200831.8690 - acc: 0.0000e+00 - val_loss: 200713.1985 - val_acc: 2.8760e-04 Epoch 70/500 - 2s - loss: 200514.9758 - acc: 0.0000e+00 - val_loss: 200233.3952 - val_acc: 0.0000e+00 Epoch 71/500 - 2s - loss: 199717.9855 - acc: 0.0000e+00 - val_loss: 199455.3688 - val_acc: 0.0000e+00 Epoch 72/500 - 2s - loss: 199304.5319 - acc: 0.0000e+00 - val_loss: 199000.3805 - val_acc: 0.0000e+00 Epoch 73/500 - 2s - loss: 198717.1111 - acc: 0.0000e+00 - val_loss: 198545.5219 - val_acc: 0.0000e+00 Epoch 74/500 - 2s - loss: 198383.9670 - acc: 0.0000e+00 - val_loss: 198091.4415 - val_acc: 0.0000e+00 Epoch 75/500 - 2s - loss: 197817.1290 - acc: 0.0000e+00 - val_loss: 197646.5827 - val_acc: 0.0000e+00 Epoch 76/500 - 2s - loss: 197381.3781 - acc: 0.0000e+00 - val_loss: 197202.6017 - val_acc: 0.0000e+00 Epoch 77/500 - 2s - loss: 196877.1992 - acc: 0.0000e+00 - val_loss: 196767.8716 - val_acc: 2.8760e-04 Epoch 78/500 - 2s - loss: 196467.1117 - acc: 0.0000e+00 - val_loss: 196334.3113 - val_acc: 0.0000e+00 Epoch 79/500 - 2s - loss: 196177.3487 - acc: 0.0000e+00 - val_loss: 195903.0680 - val_acc: 0.0000e+00 Epoch 80/500 - 2s - loss: 195683.7580 - acc: 0.0000e+00 - val_loss: 195470.2095 - val_acc: 0.0000e+00 Epoch 81/500 - 2s - loss: 195422.5725 - acc: 0.0000e+00 - val_loss: 195038.8352 - val_acc: 0.0000e+00 Epoch 82/500 - 2s - loss: 194854.1778 - acc: 0.0000e+00 - val_loss: 194615.1241 - val_acc: 0.0000e+00 Epoch 83/500 - 2s - loss: 194343.1340 - acc: 0.0000e+00 - val_loss: 194201.8916 - val_acc: 0.0000e+00 Epoch 84/500 - 2s - loss: 193875.1634 - acc: 0.0000e+00 - val_loss: 193774.1580 - val_acc: 0.0000e+00 Epoch 85/500 - 2s - loss: 193553.8551 - acc: 0.0000e+00 - val_loss: 193356.4870 - val_acc: 0.0000e+00 Epoch 86/500 - 2s - loss: 193196.1364 - acc: 0.0000e+00 - val_loss: 192931.4692 - val_acc: 2.8760e-04 Epoch 87/500 - 2s - loss: 192823.6895 - acc: 0.0000e+00 - val_loss: 192527.4390 - val_acc: 2.8760e-04 Epoch 88/500 - 2s - loss: 192282.9051 - acc: 0.0000e+00 - val_loss: 192114.5593 - val_acc: 0.0000e+00 Epoch 89/500 - 2s - loss: 191857.8865 - acc: 0.0000e+00 - val_loss: 191707.0105 - val_acc: 0.0000e+00 Epoch 90/500 - 2s - loss: 191505.9370 - acc: 0.0000e+00 - val_loss: 191308.5476 - val_acc: 0.0000e+00 Epoch 91/500 - 2s - loss: 191114.9983 - acc: 0.0000e+00 - val_loss: 190898.5700 - val_acc: 0.0000e+00 Epoch 92/500 - 2s - loss: 190835.3250 - acc: 0.0000e+00 - val_loss: 190495.9148 - val_acc: 2.8760e-04 Epoch 93/500 - 2s - loss: 190249.8917 - acc: 0.0000e+00 - val_loss: 190095.8689 - val_acc: 2.8760e-04 Epoch 94/500 - 2s - loss: 190008.7924 - acc: 0.0000e+00 - val_loss: 189699.4791 - val_acc: 0.0000e+00 Epoch 95/500 - 2s - loss: 189464.0374 - acc: 0.0000e+00 - val_loss: 189301.9428 - val_acc: 2.8760e-04 Epoch 96/500 - 2s - loss: 189000.1108 - acc: 0.0000e+00 - val_loss: 188902.8807 - val_acc: 0.0000e+00 Epoch 97/500 - 2s - loss: 188524.3779 - acc: 0.0000e+00 - val_loss: 188509.1563 - val_acc: 0.0000e+00 Epoch 98/500 - 2s - loss: 188582.9736 - acc: 2.8760e-04 - val_loss: 187986.5844 - val_acc: 0.0000e+00 Epoch 99/500 - 2s - loss: 187823.3821 - acc: 0.0000e+00 - val_loss: 187656.2729 - val_acc: 0.0000e+00 Epoch 100/500 - 2s - loss: 187548.0945 - acc: 0.0000e+00 - val_loss: 187190.4434 - val_acc: 0.0000e+00 Epoch 101/500 - 2s - loss: 187084.9576 - acc: 0.0000e+00 - val_loss: 186809.6147 - val_acc: 0.0000e+00 Epoch 102/500 - 2s - loss: 187113.1767 - acc: 0.0000e+00 - val_loss: 186896.8456 - val_acc: 0.0000e+00 Epoch 103/500 - 2s - loss: 186742.1331 - acc: 0.0000e+00 - val_loss: 186507.7935 - val_acc: 0.0000e+00 Epoch 104/500 - 2s - loss: 186273.4697 - acc: 0.0000e+00 - val_loss: 186121.3310 - val_acc: 0.0000e+00 Epoch 105/500 - 2s - loss: 185940.0060 - acc: 0.0000e+00 - val_loss: 185735.9088 - val_acc: 0.0000e+00 Epoch 106/500 - 2s - loss: 185428.6046 - acc: 0.0000e+00 - val_loss: 185354.3255 - val_acc: 0.0000e+00 Epoch 107/500 - 2s - loss: 185234.9341 - acc: 0.0000e+00 - val_loss: 184976.3639 - val_acc: 0.0000e+00 Epoch 108/500 - 2s - loss: 184637.5837 - acc: 0.0000e+00 - val_loss: 184594.0943 - val_acc: 0.0000e+00 Epoch 109/500 - 2s - loss: 184418.0348 - acc: 0.0000e+00 - val_loss: 184215.0162 - val_acc: 0.0000e+00 Epoch 110/500 - 2s - loss: 184115.8884 - acc: 0.0000e+00 - val_loss: 183843.3211 - val_acc: 0.0000e+00 Epoch 111/500 - 2s - loss: 183652.7114 - acc: 2.8760e-04 - val_loss: 183502.9403 - val_acc: 0.0000e+00 Epoch 112/500 - 2s - loss: 183361.3243 - acc: 0.0000e+00 - val_loss: 183110.0005 - val_acc: 0.0000e+00 Epoch 113/500 - 2s - loss: 183245.8812 - acc: 0.0000e+00 - val_loss: 182742.8113 - val_acc: 0.0000e+00 Epoch 114/500 - 2s - loss: 182698.3783 - acc: 0.0000e+00 - val_loss: 182381.9065 - val_acc: 0.0000e+00 Epoch 115/500 - 2s - loss: 182178.5259 - acc: 0.0000e+00 - val_loss: 182016.0790 - val_acc: 0.0000e+00 Epoch 116/500 - 2s - loss: 182033.1843 - acc: 0.0000e+00 - val_loss: 181652.4819 - val_acc: 0.0000e+00 Epoch 117/500 - 2s - loss: 181861.9280 - acc: 0.0000e+00 - val_loss: 181299.4412 - val_acc: 0.0000e+00 Epoch 118/500 - 2s - loss: 181190.8525 - acc: 0.0000e+00 - val_loss: 180932.9580 - val_acc: 0.0000e+00 Epoch 119/500 - 2s - loss: 180615.9637 - acc: 0.0000e+00 - val_loss: 180585.3063 - val_acc: 0.0000e+00 Epoch 120/500 - 2s - loss: 180458.7817 - acc: 2.8760e-04 - val_loss: 180233.3298 - val_acc: 0.0000e+00 Epoch 121/500 - 2s - loss: 180227.0807 - acc: 0.0000e+00 - val_loss: 179875.1620 - val_acc: 0.0000e+00 Epoch 122/500 - 2s - loss: 179992.1728 - acc: 0.0000e+00 - val_loss: 179522.5561 - val_acc: 0.0000e+00 Epoch 123/500 - 2s - loss: 179340.9631 - acc: 0.0000e+00 - val_loss: 179175.9631 - val_acc: 0.0000e+00 Epoch 124/500 - 2s - loss: 179107.5935 - acc: 0.0000e+00 - val_loss: 178836.4184 - val_acc: 0.0000e+00 Epoch 125/500 - 2s - loss: 178790.9279 - acc: 5.7521e-04 - val_loss: 178483.6317 - val_acc: 0.0000e+00 Epoch 126/500 - 2s - loss: 178481.6867 - acc: 0.0000e+00 - val_loss: 178148.8725 - val_acc: 0.0000e+00 Epoch 127/500 - 2s - loss: 178137.9250 - acc: 0.0000e+00 - val_loss: 177808.8997 - val_acc: 2.8760e-04 Epoch 128/500 - 2s - loss: 177878.5549 - acc: 0.0000e+00 - val_loss: 177465.5303 - val_acc: 0.0000e+00 Epoch 129/500 - 2s - loss: 177610.9472 - acc: 0.0000e+00 - val_loss: 177126.9104 - val_acc: 0.0000e+00 Epoch 130/500 - 2s - loss: 177079.4112 - acc: 0.0000e+00 - val_loss: 176788.9154 - val_acc: 0.0000e+00 Epoch 131/500 - 2s - loss: 176756.6301 - acc: 0.0000e+00 - val_loss: 176460.1091 - val_acc: 0.0000e+00 Epoch 132/500 - 2s - loss: 176592.4682 - acc: 0.0000e+00 - val_loss: 176115.6201 - val_acc: 0.0000e+00 Epoch 133/500 - 2s - loss: 176046.2411 - acc: 2.8760e-04 - val_loss: 175796.2173 - val_acc: 0.0000e+00 Epoch 134/500 - 2s - loss: 175742.8151 - acc: 0.0000e+00 - val_loss: 175466.4175 - val_acc: 2.8760e-04 Epoch 135/500 - 2s - loss: 175503.3632 - acc: 0.0000e+00 - val_loss: 175127.3169 - val_acc: 0.0000e+00 Epoch 136/500 - 2s - loss: 174907.9096 - acc: 0.0000e+00 - val_loss: 174805.7643 - val_acc: 0.0000e+00 Epoch 137/500 - 2s - loss: 174611.3577 - acc: 2.8760e-04 - val_loss: 174476.3127 - val_acc: 0.0000e+00 Epoch 138/500 - 2s - loss: 174609.3507 - acc: 2.8760e-04 - val_loss: 174153.6887 - val_acc: 0.0000e+00 Epoch 139/500 - 2s - loss: 174289.8259 - acc: 0.0000e+00 - val_loss: 173835.7960 - val_acc: 0.0000e+00 Epoch 140/500 - 2s - loss: 173853.6221 - acc: 0.0000e+00 - val_loss: 173517.1795 - val_acc: 0.0000e+00 Epoch 141/500 - 2s - loss: 173428.0288 - acc: 0.0000e+00 - val_loss: 173187.3412 - val_acc: 0.0000e+00 Epoch 142/500 - 2s - loss: 173124.6239 - acc: 0.0000e+00 - val_loss: 172866.8039 - val_acc: 0.0000e+00 Epoch 143/500 - 2s - loss: 172793.6353 - acc: 0.0000e+00 - val_loss: 172529.6396 - val_acc: 0.0000e+00 Epoch 144/500 - 2s - loss: 172523.0024 - acc: 0.0000e+00 - val_loss: 172232.5899 - val_acc: 0.0000e+00 Epoch 145/500 - 2s - loss: 172297.6709 - acc: 0.0000e+00 - val_loss: 171899.2591 - val_acc: 0.0000e+00 Epoch 146/500 - 2s - loss: 171976.2375 - acc: 0.0000e+00 - val_loss: 171572.7133 - val_acc: 0.0000e+00 Epoch 147/500 - 2s - loss: 171752.3841 - acc: 0.0000e+00 - val_loss: 171291.9471 - val_acc: 0.0000e+00 Epoch 148/500 - 2s - loss: 171162.3094 - acc: 0.0000e+00 - val_loss: 170975.3312 - val_acc: 0.0000e+00 Epoch 149/500 - 2s - loss: 170903.4530 - acc: 0.0000e+00 - val_loss: 170647.9497 - val_acc: 0.0000e+00 Epoch 150/500 - 2s - loss: 170708.6626 - acc: 0.0000e+00 - val_loss: 170361.4426 - val_acc: 0.0000e+00 Epoch 151/500 - 2s - loss: 170524.1417 - acc: 0.0000e+00 - val_loss: 170060.2050 - val_acc: 2.8760e-04 Epoch 152/500 - 2s - loss: 170242.3502 - acc: 2.8760e-04 - val_loss: 169728.2799 - val_acc: 0.0000e+00 Epoch 153/500 - 2s - loss: 169962.3401 - acc: 0.0000e+00 - val_loss: 169416.0171 - val_acc: 0.0000e+00 Epoch 154/500 - 2s - loss: 169702.7952 - acc: 0.0000e+00 - val_loss: 169155.4697 - val_acc: 2.8760e-04 Epoch 155/500 - 2s - loss: 169182.8902 - acc: 0.0000e+00 - val_loss: 168819.1732 - val_acc: 2.8760e-04 Epoch 156/500 - 2s - loss: 168995.0300 - acc: 2.8760e-04 - val_loss: 168523.8319 - val_acc: 2.8760e-04 Epoch 157/500 - 2s - loss: 168804.0758 - acc: 2.8760e-04 - val_loss: 168185.9636 - val_acc: 0.0000e+00 Epoch 158/500 - 2s - loss: 168191.5185 - acc: 0.0000e+00 - val_loss: 167904.0125 - val_acc: 0.0000e+00 Epoch 159/500 - 2s - loss: 168160.3843 - acc: 0.0000e+00 - val_loss: 167647.9119 - val_acc: 0.0000e+00 Epoch 160/500 - 2s - loss: 168010.0011 - acc: 0.0000e+00 - val_loss: 167345.5596 - val_acc: 0.0000e+00 Epoch 161/500 - 2s - loss: 167200.0176 - acc: 2.8760e-04 - val_loss: 167019.4958 - val_acc: 0.0000e+00 Epoch 162/500 - 2s - loss: 167286.1218 - acc: 0.0000e+00 - val_loss: 166635.0658 - val_acc: 0.0000e+00 Epoch 163/500 - 2s - loss: 167080.4452 - acc: 0.0000e+00 - val_loss: 166473.3401 - val_acc: 0.0000e+00 Epoch 164/500 - 2s - loss: 166466.0709 - acc: 0.0000e+00 - val_loss: 166136.9338 - val_acc: 2.8760e-04 Epoch 165/500 - 2s - loss: 166228.6813 - acc: 0.0000e+00 - val_loss: 165869.2517 - val_acc: 5.7521e-04 Epoch 166/500 - 2s - loss: 166097.4674 - acc: 0.0000e+00 - val_loss: 165572.6432 - val_acc: 2.8760e-04 Epoch 167/500 - 2s - loss: 165569.6036 - acc: 0.0000e+00 - val_loss: 165287.6017 - val_acc: 0.0000e+00 Epoch 168/500 - 2s - loss: 165623.5437 - acc: 0.0000e+00 - val_loss: 164953.8091 - val_acc: 2.8760e-04 Epoch 169/500 - 2s - loss: 165307.4956 - acc: 2.8760e-04 - val_loss: 164622.1161 - val_acc: 2.8760e-04 Epoch 170/500 - 2s - loss: 164783.2370 - acc: 0.0000e+00 - val_loss: 164341.8462 - val_acc: 0.0000e+00 Epoch 171/500 - 2s - loss: 164384.8154 - acc: 0.0000e+00 - val_loss: 164031.0053 - val_acc: 0.0000e+00 Epoch 172/500 - 2s - loss: 164162.9165 - acc: 0.0000e+00 - val_loss: 163689.1634 - val_acc: 0.0000e+00 Epoch 173/500 - 2s - loss: 164125.1459 - acc: 0.0000e+00 - val_loss: 163421.2106 - val_acc: 0.0000e+00 Epoch 174/500 - 2s - loss: 163615.3991 - acc: 0.0000e+00 - val_loss: 163097.5967 - val_acc: 0.0000e+00 Epoch 175/500 - 2s - loss: 163489.0387 - acc: 0.0000e+00 - val_loss: 162873.1600 - val_acc: 0.0000e+00 Epoch 176/500 - 2s - loss: 163155.9230 - acc: 0.0000e+00 - val_loss: 162581.9887 - val_acc: 0.0000e+00 Epoch 177/500 - 2s - loss: 162859.4050 - acc: 0.0000e+00 - val_loss: 162198.9720 - val_acc: 0.0000e+00 Epoch 178/500 - 2s - loss: 162879.7991 - acc: 0.0000e+00 - val_loss: 162051.9035 - val_acc: 0.0000e+00 Epoch 179/500 - 2s - loss: 162299.2808 - acc: 0.0000e+00 - val_loss: 161689.6037 - val_acc: 2.8760e-04 Epoch 180/500 - 2s - loss: 162125.8359 - acc: 0.0000e+00 - val_loss: 161388.0723 - val_acc: 2.8760e-04 Epoch 181/500 - 2s - loss: 161834.3630 - acc: 0.0000e+00 - val_loss: 161094.3696 - val_acc: 2.8760e-04 Epoch 182/500 - 2s - loss: 161552.4347 - acc: 0.0000e+00 - val_loss: 160935.1006 - val_acc: 0.0000e+00 Epoch 183/500 - 2s - loss: 161391.3173 - acc: 0.0000e+00 - val_loss: 160625.5333 - val_acc: 0.0000e+00 Epoch 184/500 - 2s - loss: 160741.8211 - acc: 0.0000e+00 - val_loss: 160230.8716 - val_acc: 0.0000e+00 Epoch 185/500 - 2s - loss: 160698.9086 - acc: 0.0000e+00 - val_loss: 159992.5158 - val_acc: 2.8760e-04 Epoch 186/500 - 2s - loss: 160452.7113 - acc: 0.0000e+00 - val_loss: 159711.3022 - val_acc: 2.8760e-04 Epoch 187/500 - 2s - loss: 160235.0938 - acc: 0.0000e+00 - val_loss: 159384.2852 - val_acc: 2.8760e-04 Epoch 188/500 - 2s - loss: 159797.2247 - acc: 2.8760e-04 - val_loss: 159102.7106 - val_acc: 0.0000e+00 Epoch 189/500 - 2s - loss: 159415.5979 - acc: 2.8760e-04 - val_loss: 158851.5803 - val_acc: 0.0000e+00 Epoch 190/500 - 2s - loss: 159382.6426 - acc: 0.0000e+00 - val_loss: 158572.6205 - val_acc: 0.0000e+00 Epoch 191/500 - 2s - loss: 158959.7986 - acc: 0.0000e+00 - val_loss: 158245.0662 - val_acc: 0.0000e+00 Epoch 192/500 - 2s - loss: 158587.9760 - acc: 2.8760e-04 - val_loss: 157980.4719 - val_acc: 0.0000e+00 Epoch 193/500 - 2s - loss: 158600.9266 - acc: 0.0000e+00 - val_loss: 157756.4615 - val_acc: 0.0000e+00 Epoch 194/500 - 2s - loss: 158078.9056 - acc: 0.0000e+00 - val_loss: 157390.7395 - val_acc: 0.0000e+00 Epoch 195/500 - 2s - loss: 157891.4568 - acc: 0.0000e+00 - val_loss: 157187.0537 - val_acc: 0.0000e+00 Epoch 196/500 - 2s - loss: 157615.6274 - acc: 0.0000e+00 - val_loss: 156915.5112 - val_acc: 0.0000e+00 Epoch 197/500 - 2s - loss: 157160.0082 - acc: 0.0000e+00 - val_loss: 156648.8380 - val_acc: 0.0000e+00 Epoch 198/500 - 3s - loss: 157200.2425 - acc: 0.0000e+00 - val_loss: 156421.0472 - val_acc: 0.0000e+00 Epoch 199/500 - 2s - loss: 156869.6605 - acc: 2.8760e-04 - val_loss: 156175.4612 - val_acc: 0.0000e+00 Epoch 200/500 - 2s - loss: 156672.3571 - acc: 0.0000e+00 - val_loss: 155890.2040 - val_acc: 0.0000e+00 Epoch 201/500 - 2s - loss: 156051.5479 - acc: 2.8760e-04 - val_loss: 155504.1347 - val_acc: 0.0000e+00 Epoch 202/500 - 2s - loss: 155901.3640 - acc: 0.0000e+00 - val_loss: 155323.2393 - val_acc: 0.0000e+00 Epoch 203/500 - 2s - loss: 155789.8756 - acc: 0.0000e+00 - val_loss: 155009.7290 - val_acc: 0.0000e+00 Epoch 204/500 - 2s - loss: 155466.7994 - acc: 0.0000e+00 - val_loss: 154763.4590 - val_acc: 0.0000e+00 Epoch 205/500 - 2s - loss: 155367.1725 - acc: 0.0000e+00 - val_loss: 154476.1140 - val_acc: 0.0000e+00 Epoch 206/500 - 2s - loss: 154999.4332 - acc: 2.8760e-04 - val_loss: 154220.2388 - val_acc: 0.0000e+00 Epoch 207/500 - 2s - loss: 154750.5173 - acc: 0.0000e+00 - val_loss: 153974.6749 - val_acc: 0.0000e+00 Epoch 208/500 - 2s - loss: 154418.7816 - acc: 0.0000e+00 - val_loss: 153694.9579 - val_acc: 0.0000e+00 Epoch 209/500 - 2s - loss: 153963.3343 - acc: 2.8760e-04 - val_loss: 153424.6389 - val_acc: 0.0000e+00 Epoch 210/500 - 2s - loss: 154237.0511 - acc: 2.8760e-04 - val_loss: 153219.9716 - val_acc: 0.0000e+00 Epoch 211/500 - 2s - loss: 153530.8117 - acc: 5.7521e-04 - val_loss: 152898.6119 - val_acc: 0.0000e+00 Epoch 212/500 - 2s - loss: 153115.9693 - acc: 2.8760e-04 - val_loss: 152613.0596 - val_acc: 0.0000e+00 Epoch 213/500 - 2s - loss: 153052.8169 - acc: 0.0000e+00 - val_loss: 152363.2343 - val_acc: 0.0000e+00 Epoch 214/500 - 2s - loss: 153112.2147 - acc: 0.0000e+00 - val_loss: 152118.4079 - val_acc: 0.0000e+00 Epoch 215/500 - 2s - loss: 152293.8418 - acc: 0.0000e+00 - val_loss: 151860.1592 - val_acc: 0.0000e+00 Epoch 216/500 - 2s - loss: 152309.8142 - acc: 2.8760e-04 - val_loss: 151585.1005 - val_acc: 0.0000e+00 Epoch 217/500 - 2s - loss: 152003.4079 - acc: 0.0000e+00 - val_loss: 151318.5452 - val_acc: 0.0000e+00 Epoch 218/500 - 2s - loss: 151801.8089 - acc: 0.0000e+00 - val_loss: 151011.6174 - val_acc: 0.0000e+00 Epoch 219/500 - 2s - loss: 151493.6602 - acc: 0.0000e+00 - val_loss: 150879.5737 - val_acc: 0.0000e+00 Epoch 220/500 - 2s - loss: 151245.8471 - acc: 0.0000e+00 - val_loss: 150603.1636 - val_acc: 0.0000e+00 Epoch 221/500 - 2s - loss: 151178.9075 - acc: 0.0000e+00 - val_loss: 150353.0207 - val_acc: 0.0000e+00 Epoch 222/500 - 2s - loss: 150897.8741 - acc: 0.0000e+00 - val_loss: 150050.8866 - val_acc: 0.0000e+00 Epoch 223/500 - 2s - loss: 150456.2675 - acc: 0.0000e+00 - val_loss: 149789.9268 - val_acc: 0.0000e+00 Epoch 224/500 - 2s - loss: 149981.1968 - acc: 0.0000e+00 - val_loss: 149544.8592 - val_acc: 0.0000e+00 Epoch 225/500 - 2s - loss: 150066.3879 - acc: 0.0000e+00 - val_loss: 149230.6581 - val_acc: 0.0000e+00 Epoch 226/500 - 2s - loss: 149841.1425 - acc: 0.0000e+00 - val_loss: 149056.8353 - val_acc: 0.0000e+00 Epoch 227/500 - 2s - loss: 149628.7708 - acc: 0.0000e+00 - val_loss: 148798.5240 - val_acc: 0.0000e+00 Epoch 228/500 - 3s - loss: 149403.6729 - acc: 2.8760e-04 - val_loss: 148508.6356 - val_acc: 0.0000e+00 Epoch 229/500 - 2s - loss: 148956.9944 - acc: 0.0000e+00 - val_loss: 148288.1646 - val_acc: 0.0000e+00 Epoch 230/500 - 2s - loss: 148783.9153 - acc: 2.8760e-04 - val_loss: 147999.7455 - val_acc: 0.0000e+00 Epoch 231/500 - 2s - loss: 148449.5945 - acc: 0.0000e+00 - val_loss: 147765.9182 - val_acc: 0.0000e+00 Epoch 232/500 - 2s - loss: 148363.2919 - acc: 0.0000e+00 - val_loss: 147546.8759 - val_acc: 0.0000e+00 Epoch 233/500 - 2s - loss: 147776.0751 - acc: 0.0000e+00 - val_loss: 147245.9764 - val_acc: 0.0000e+00 Epoch 234/500 - 2s - loss: 147665.7166 - acc: 0.0000e+00 - val_loss: 146982.8459 - val_acc: 0.0000e+00 Epoch 235/500 - 2s - loss: 147344.9490 - acc: 0.0000e+00 - val_loss: 146738.0363 - val_acc: 0.0000e+00 Epoch 236/500 - 2s - loss: 147255.7611 - acc: 0.0000e+00 - val_loss: 146477.6938 - val_acc: 0.0000e+00 Epoch 237/500 - 2s - loss: 146784.6851 - acc: 0.0000e+00 - val_loss: 146258.7967 - val_acc: 0.0000e+00 Epoch 238/500 - 2s - loss: 146617.4036 - acc: 2.8760e-04 - val_loss: 145986.0381 - val_acc: 0.0000e+00 Epoch 239/500 - 2s - loss: 146464.8262 - acc: 2.8760e-04 - val_loss: 145794.0066 - val_acc: 0.0000e+00 Epoch 240/500 - 2s - loss: 146127.2701 - acc: 0.0000e+00 - val_loss: 145522.6462 - val_acc: 0.0000e+00 Epoch 241/500 - 2s - loss: 145850.6584 - acc: 0.0000e+00 - val_loss: 145217.4114 - val_acc: 0.0000e+00 Epoch 242/500 - 2s - loss: 145499.3587 - acc: 0.0000e+00 - val_loss: 144984.3418 - val_acc: 0.0000e+00 Epoch 243/500 - 2s - loss: 145512.0066 - acc: 2.8760e-04 - val_loss: 144774.6197 - val_acc: 0.0000e+00 Epoch 244/500 - 2s - loss: 145126.7895 - acc: 0.0000e+00 - val_loss: 144492.1389 - val_acc: 0.0000e+00 Epoch 245/500 - 2s - loss: 145074.4676 - acc: 0.0000e+00 - val_loss: 144267.9684 - val_acc: 0.0000e+00 Epoch 246/500 - 2s - loss: 144660.4512 - acc: 0.0000e+00 - val_loss: 143979.6775 - val_acc: 2.8760e-04 Epoch 247/500 - 2s - loss: 144638.7024 - acc: 0.0000e+00 - val_loss: 143802.4433 - val_acc: 2.8760e-04 Epoch 248/500 - 2s - loss: 144454.3364 - acc: 0.0000e+00 - val_loss: 143505.5637 - val_acc: 2.8760e-04 Epoch 249/500 - 2s - loss: 144139.4769 - acc: 2.8760e-04 - val_loss: 143312.1368 - val_acc: 2.8760e-04 Epoch 250/500 - 2s - loss: 143674.0164 - acc: 0.0000e+00 - val_loss: 143054.1678 - val_acc: 2.8760e-04 Epoch 251/500 - 2s - loss: 143524.1866 - acc: 0.0000e+00 - val_loss: 142870.9143 - val_acc: 0.0000e+00 Epoch 252/500 - 2s - loss: 143225.3097 - acc: 0.0000e+00 - val_loss: 142587.4751 - val_acc: 0.0000e+00 Epoch 253/500 - 2s - loss: 142930.0695 - acc: 0.0000e+00 - val_loss: 142342.4352 - val_acc: 0.0000e+00 Epoch 254/500 - 2s - loss: 142998.2570 - acc: 0.0000e+00 - val_loss: 142147.3379 - val_acc: 0.0000e+00 Epoch 255/500 - 2s - loss: 142605.9415 - acc: 0.0000e+00 - val_loss: 141887.8703 - val_acc: 0.0000e+00 Epoch 256/500 - 2s - loss: 142385.0274 - acc: 0.0000e+00 - val_loss: 141638.3684 - val_acc: 0.0000e+00 Epoch 257/500 - 2s - loss: 142104.4011 - acc: 2.8760e-04 - val_loss: 141385.3904 - val_acc: 0.0000e+00 Epoch 258/500 - 2s - loss: 142162.0529 - acc: 0.0000e+00 - val_loss: 141174.9069 - val_acc: 0.0000e+00 Epoch 259/500 - 2s - loss: 141958.3432 - acc: 2.8760e-04 - val_loss: 140969.4075 - val_acc: 0.0000e+00 Epoch 260/500 - 2s - loss: 141576.0149 - acc: 0.0000e+00 - val_loss: 140745.6025 - val_acc: 0.0000e+00 Epoch 261/500 - 2s - loss: 141045.0811 - acc: 0.0000e+00 - val_loss: 140424.7214 - val_acc: 0.0000e+00 Epoch 262/500 - 2s - loss: 141099.4340 - acc: 0.0000e+00 - val_loss: 140245.1252 - val_acc: 0.0000e+00 Epoch 263/500 - 2s - loss: 140779.1084 - acc: 0.0000e+00 - val_loss: 140008.7503 - val_acc: 0.0000e+00 Epoch 264/500 - 2s - loss: 140512.2265 - acc: 0.0000e+00 - val_loss: 139788.5445 - val_acc: 0.0000e+00 Epoch 265/500 - 2s - loss: 140775.8771 - acc: 0.0000e+00 - val_loss: 139569.0363 - val_acc: 0.0000e+00 Epoch 266/500 - 2s - loss: 140206.0051 - acc: 0.0000e+00 - val_loss: 139323.5103 - val_acc: 0.0000e+00 Epoch 267/500 - 2s - loss: 139714.1097 - acc: 0.0000e+00 - val_loss: 139044.7612 - val_acc: 0.0000e+00 Epoch 268/500 - 2s - loss: 139433.3812 - acc: 0.0000e+00 - val_loss: 138833.4457 - val_acc: 0.0000e+00 Epoch 269/500 - 2s - loss: 139084.2115 - acc: 0.0000e+00 - val_loss: 138590.7889 - val_acc: 0.0000e+00 Epoch 270/500 - 2s - loss: 138948.2853 - acc: 0.0000e+00 - val_loss: 138394.5507 - val_acc: 0.0000e+00 Epoch 271/500 - 2s - loss: 138977.0617 - acc: 0.0000e+00 - val_loss: 138223.1965 - val_acc: 0.0000e+00 Epoch 272/500 - 2s - loss: 138273.3202 - acc: 0.0000e+00 - val_loss: 137901.3763 - val_acc: 0.0000e+00 Epoch 273/500 - 2s - loss: 138723.5378 - acc: 0.0000e+00 - val_loss: 137739.5425 - val_acc: 0.0000e+00 Epoch 274/500 - 2s - loss: 138305.8302 - acc: 0.0000e+00 - val_loss: 137421.3930 - val_acc: 0.0000e+00 Epoch 275/500 - 2s - loss: 137892.5265 - acc: 2.8760e-04 - val_loss: 137280.3918 - val_acc: 0.0000e+00 Epoch 276/500 - 2s - loss: 137678.6718 - acc: 0.0000e+00 - val_loss: 136917.0710 - val_acc: 0.0000e+00 Epoch 277/500 - 2s - loss: 138072.5832 - acc: 2.8760e-04 - val_loss: 136844.8789 - val_acc: 0.0000e+00 Epoch 278/500 - 2s - loss: 137260.6344 - acc: 0.0000e+00 - val_loss: 136619.5742 - val_acc: 0.0000e+00 Epoch 279/500 - 2s - loss: 137465.0830 - acc: 0.0000e+00 - val_loss: 136337.8390 - val_acc: 0.0000e+00 Epoch 280/500 - 2s - loss: 136639.5057 - acc: 0.0000e+00 - val_loss: 136066.2178 - val_acc: 0.0000e+00 Epoch 281/500 - 2s - loss: 136817.2178 - acc: 0.0000e+00 - val_loss: 135889.8101 - val_acc: 0.0000e+00 Epoch 282/500 - 2s - loss: 136469.6263 - acc: 0.0000e+00 - val_loss: 135631.8158 - val_acc: 0.0000e+00 Epoch 283/500 - 2s - loss: 135843.9206 - acc: 0.0000e+00 - val_loss: 135425.1382 - val_acc: 0.0000e+00 Epoch 284/500 - 2s - loss: 135961.8998 - acc: 2.8760e-04 - val_loss: 135245.8427 - val_acc: 0.0000e+00 Epoch 285/500 - 2s - loss: 135270.7461 - acc: 0.0000e+00 - val_loss: 134910.0718 - val_acc: 0.0000e+00 Epoch 286/500 - 2s - loss: 135498.9362 - acc: 0.0000e+00 - val_loss: 134770.0466 - val_acc: 0.0000e+00 Epoch 287/500 - 3s - loss: 134886.6966 - acc: 0.0000e+00 - val_loss: 134501.4029 - val_acc: 0.0000e+00 Epoch 288/500 - 2s - loss: 135099.5701 - acc: 0.0000e+00 - val_loss: 134277.0943 - val_acc: 0.0000e+00 Epoch 289/500 - 2s - loss: 135162.2676 - acc: 0.0000e+00 - val_loss: 134140.9472 - val_acc: 0.0000e+00 Epoch 290/500 - 2s - loss: 134555.0574 - acc: 0.0000e+00 - val_loss: 133855.0343 - val_acc: 0.0000e+00 Epoch 291/500 - 2s - loss: 134353.6519 - acc: 2.8760e-04 - val_loss: 133622.1077 - val_acc: 2.8760e-04 Epoch 292/500 - 3s - loss: 133992.2645 - acc: 0.0000e+00 - val_loss: 133410.5673 - val_acc: 2.8760e-04 Epoch 293/500 - 2s - loss: 133847.2031 - acc: 0.0000e+00 - val_loss: 133155.7503 - val_acc: 2.8760e-04 Epoch 294/500 - 2s - loss: 133778.7678 - acc: 0.0000e+00 - val_loss: 132969.6993 - val_acc: 2.8760e-04 Epoch 295/500 - 2s - loss: 133383.6468 - acc: 0.0000e+00 - val_loss: 132734.5014 - val_acc: 2.8760e-04 Epoch 296/500 - 2s - loss: 133531.9903 - acc: 0.0000e+00 - val_loss: 132514.2400 - val_acc: 2.8760e-04 Epoch 297/500 - 2s - loss: 133053.1592 - acc: 0.0000e+00 - val_loss: 132297.4555 - val_acc: 2.8760e-04 Epoch 298/500 - 2s - loss: 133302.1895 - acc: 0.0000e+00 - val_loss: 132077.3451 - val_acc: 2.8760e-04 Epoch 299/500 - 2s - loss: 132689.0420 - acc: 0.0000e+00 - val_loss: 131857.6665 - val_acc: 2.8760e-04 Epoch 300/500 - 2s - loss: 132473.5216 - acc: 0.0000e+00 - val_loss: 131657.2425 - val_acc: 2.8760e-04 Epoch 301/500 - 2s - loss: 132070.9815 - acc: 0.0000e+00 - val_loss: 131403.7001 - val_acc: 2.8760e-04 Epoch 302/500 - 2s - loss: 132391.9094 - acc: 0.0000e+00 - val_loss: 131229.8746 - val_acc: 2.8760e-04 Epoch 303/500 - 2s - loss: 131707.0519 - acc: 0.0000e+00 - val_loss: 130998.9775 - val_acc: 2.8760e-04 Epoch 304/500 - 2s - loss: 131770.6114 - acc: 0.0000e+00 - val_loss: 130860.9783 - val_acc: 2.8760e-04 Epoch 305/500 - 2s - loss: 131316.9740 - acc: 0.0000e+00 - val_loss: 130569.3525 - val_acc: 2.8760e-04 Epoch 306/500 - 2s - loss: 131226.9930 - acc: 0.0000e+00 - val_loss: 130390.5280 - val_acc: 2.8760e-04 Epoch 307/500 - 2s - loss: 131082.0830 - acc: 0.0000e+00 - val_loss: 130142.1672 - val_acc: 2.8760e-04 Epoch 308/500 - 2s - loss: 130818.9197 - acc: 0.0000e+00 - val_loss: 129940.8789 - val_acc: 2.8760e-04 Epoch 309/500 - 2s - loss: 130634.6874 - acc: 0.0000e+00 - val_loss: 129719.4185 - val_acc: 0.0000e+00 Epoch 310/500 - 2s - loss: 130407.1236 - acc: 0.0000e+00 - val_loss: 129534.5963 - val_acc: 0.0000e+00 Epoch 311/500 - 2s - loss: 130050.6562 - acc: 2.8760e-04 - val_loss: 129318.2759 - val_acc: 0.0000e+00 Epoch 312/500 - 2s - loss: 129741.3528 - acc: 0.0000e+00 - val_loss: 129077.4203 - val_acc: 0.0000e+00 Epoch 313/500 - 2s - loss: 129648.2650 - acc: 2.8760e-04 - val_loss: 128897.3552 - val_acc: 0.0000e+00 Epoch 314/500 - 2s - loss: 129079.6248 - acc: 0.0000e+00 - val_loss: 128679.0532 - val_acc: 0.0000e+00 Epoch 315/500 - 2s - loss: 129015.5273 - acc: 0.0000e+00 - val_loss: 128445.0817 - val_acc: 0.0000e+00 Epoch 316/500 - 2s - loss: 128854.6595 - acc: 0.0000e+00 - val_loss: 128233.0038 - val_acc: 0.0000e+00 Epoch 317/500 - 2s - loss: 128528.2127 - acc: 2.8760e-04 - val_loss: 128067.1797 - val_acc: 0.0000e+00 Epoch 318/500 - 3s - loss: 128362.8472 - acc: 0.0000e+00 - val_loss: 127800.0737 - val_acc: 0.0000e+00 Epoch 319/500 - 2s - loss: 128830.5754 - acc: 0.0000e+00 - val_loss: 127632.3559 - val_acc: 0.0000e+00 Epoch 320/500 - 2s - loss: 127922.3702 - acc: 0.0000e+00 - val_loss: 127393.0715 - val_acc: 0.0000e+00 Epoch 321/500 - 2s - loss: 128191.6115 - acc: 0.0000e+00 - val_loss: 127212.7134 - val_acc: 0.0000e+00 Epoch 322/500 - 2s - loss: 127737.3787 - acc: 0.0000e+00 - val_loss: 127005.5230 - val_acc: 0.0000e+00 Epoch 323/500 - 2s - loss: 127223.6184 - acc: 0.0000e+00 - val_loss: 126800.6605 - val_acc: 0.0000e+00 Epoch 324/500 - 2s - loss: 127267.6568 - acc: 0.0000e+00 - val_loss: 126647.8408 - val_acc: 0.0000e+00 Epoch 325/500 - 2s - loss: 127213.1606 - acc: 0.0000e+00 - val_loss: 126412.9828 - val_acc: 0.0000e+00 Epoch 326/500 - 2s - loss: 126879.9754 - acc: 0.0000e+00 - val_loss: 126213.7384 - val_acc: 0.0000e+00 Epoch 327/500 - 2s - loss: 127034.6654 - acc: 0.0000e+00 - val_loss: 126006.6070 - val_acc: 0.0000e+00 Epoch 328/500 - 2s - loss: 126579.0483 - acc: 0.0000e+00 - val_loss: 125799.9845 - val_acc: 0.0000e+00 Epoch 329/500 - 2s - loss: 126431.9550 - acc: 0.0000e+00 - val_loss: 125591.3756 - val_acc: 0.0000e+00 Epoch 330/500 - 2s - loss: 126025.4994 - acc: 0.0000e+00 - val_loss: 125358.1238 - val_acc: 0.0000e+00 Epoch 331/500 - 2s - loss: 126414.0748 - acc: 0.0000e+00 - val_loss: 125422.7192 - val_acc: 0.0000e+00 Epoch 332/500 - 2s - loss: 125656.5781 - acc: 2.8760e-04 - val_loss: 125285.3505 - val_acc: 0.0000e+00 Epoch 333/500 - 2s - loss: 125506.2421 - acc: 0.0000e+00 - val_loss: 125072.9172 - val_acc: 0.0000e+00 Epoch 334/500 - 2s - loss: 125291.1552 - acc: 0.0000e+00 - val_loss: 124818.9148 - val_acc: 0.0000e+00 Epoch 335/500 - 2s - loss: 125665.7994 - acc: 0.0000e+00 - val_loss: 124656.9003 - val_acc: 0.0000e+00 Epoch 336/500 - 2s - loss: 125133.5321 - acc: 0.0000e+00 - val_loss: 124464.1303 - val_acc: 2.8760e-04 Epoch 337/500 - 2s - loss: 125129.6220 - acc: 0.0000e+00 - val_loss: 124254.1999 - val_acc: 2.8760e-04 Epoch 338/500 - 2s - loss: 125087.9206 - acc: 2.8760e-04 - val_loss: 124007.1367 - val_acc: 2.8760e-04 Epoch 339/500 - 2s - loss: 124544.2235 - acc: 0.0000e+00 - val_loss: 123795.1931 - val_acc: 2.8760e-04 Epoch 340/500 - 2s - loss: 124476.6017 - acc: 2.8760e-04 - val_loss: 123546.4458 - val_acc: 2.8760e-04 Epoch 341/500 - 2s - loss: 124409.0815 - acc: 0.0000e+00 - val_loss: 123435.3971 - val_acc: 2.8760e-04 Epoch 342/500 - 2s - loss: 124222.2809 - acc: 0.0000e+00 - val_loss: 123210.3945 - val_acc: 2.8760e-04 Epoch 343/500 - 2s - loss: 123483.9855 - acc: 0.0000e+00 - val_loss: 123004.7957 - val_acc: 2.8760e-04 Epoch 344/500 - 2s - loss: 123237.5395 - acc: 0.0000e+00 - val_loss: 122784.7676 - val_acc: 2.8760e-04 Epoch 345/500 - 2s - loss: 123495.0847 - acc: 2.8760e-04 - val_loss: 122589.2223 - val_acc: 2.8760e-04 Epoch 346/500 - 2s - loss: 123176.3679 - acc: 0.0000e+00 - val_loss: 122412.7800 - val_acc: 2.8760e-04 Epoch 347/500 - 2s - loss: 122947.2946 - acc: 0.0000e+00 - val_loss: 122149.0238 - val_acc: 2.8760e-04 Epoch 348/500 - 2s - loss: 122607.1946 - acc: 0.0000e+00 - val_loss: 121988.6308 - val_acc: 2.8760e-04 Epoch 349/500 - 2s - loss: 123112.6624 - acc: 0.0000e+00 - val_loss: 121744.0461 - val_acc: 2.8760e-04 Epoch 350/500 - 3s - loss: 122248.9178 - acc: 0.0000e+00 - val_loss: 121546.0285 - val_acc: 2.8760e-04 Epoch 351/500 - 3s - loss: 122417.9495 - acc: 0.0000e+00 - val_loss: 121388.6294 - val_acc: 2.8760e-04 Epoch 352/500 - 2s - loss: 121990.3980 - acc: 0.0000e+00 - val_loss: 121189.6649 - val_acc: 2.8760e-04 Epoch 353/500 - 2s - loss: 121513.1515 - acc: 0.0000e+00 - val_loss: 120950.4475 - val_acc: 2.8760e-04 Epoch 354/500 - 2s - loss: 121628.1951 - acc: 0.0000e+00 - val_loss: 120770.8232 - val_acc: 2.8760e-04 Epoch 355/500 - 2s - loss: 121141.6633 - acc: 0.0000e+00 - val_loss: 120577.8369 - val_acc: 2.8760e-04 Epoch 356/500 - 2s - loss: 120797.2963 - acc: 0.0000e+00 - val_loss: 120368.8996 - val_acc: 2.8760e-04 Epoch 357/500 - 2s - loss: 121282.4175 - acc: 2.8760e-04 - val_loss: 120169.9084 - val_acc: 2.8760e-04 Epoch 358/500 - 2s - loss: 120809.6676 - acc: 0.0000e+00 - val_loss: 119981.9620 - val_acc: 2.8760e-04 Epoch 359/500 - 2s - loss: 120432.3057 - acc: 0.0000e+00 - val_loss: 119819.1681 - val_acc: 2.8760e-04 Epoch 360/500 - 2s - loss: 120271.7276 - acc: 0.0000e+00 - val_loss: 119587.4936 - val_acc: 2.8760e-04 Epoch 361/500 - 2s - loss: 120509.1903 - acc: 0.0000e+00 - val_loss: 119389.7586 - val_acc: 2.8760e-04 Epoch 362/500 - 2s - loss: 119403.7000 - acc: 0.0000e+00 - val_loss: 119171.7917 - val_acc: 0.0000e+00 Epoch 363/500 - 2s - loss: 120639.1823 - acc: 0.0000e+00 - val_loss: 119439.4396 - val_acc: 0.0000e+00 Epoch 364/500 - 2s - loss: 119797.8801 - acc: 0.0000e+00 - val_loss: 119160.3474 - val_acc: 0.0000e+00 Epoch 365/500 - 2s - loss: 118745.3519 - acc: 2.8760e-04 - val_loss: 117996.5159 - val_acc: 0.0000e+00 Epoch 366/500 - 2s - loss: 119136.0884 - acc: 0.0000e+00 - val_loss: 117812.1015 - val_acc: 0.0000e+00 Epoch 367/500 - 2s - loss: 118339.2565 - acc: 0.0000e+00 - val_loss: 117578.3424 - val_acc: 0.0000e+00 Epoch 368/500 - 2s - loss: 117649.0626 - acc: 0.0000e+00 - val_loss: 116664.5009 - val_acc: 2.8760e-04 Epoch 369/500 - 2s - loss: 117700.4578 - acc: 0.0000e+00 - val_loss: 116558.5614 - val_acc: 2.8760e-04 Epoch 370/500 - 2s - loss: 117138.0143 - acc: 0.0000e+00 - val_loss: 116268.3454 - val_acc: 2.8760e-04 Epoch 371/500 - 2s - loss: 116837.7711 - acc: 0.0000e+00 - val_loss: 116110.7826 - val_acc: 2.8760e-04 Epoch 372/500 - 2s - loss: 116561.4277 - acc: 0.0000e+00 - val_loss: 115830.5783 - val_acc: 2.8760e-04 Epoch 373/500 - 2s - loss: 116927.2507 - acc: 0.0000e+00 - val_loss: 115710.2248 - val_acc: 2.8760e-04 Epoch 374/500 - 2s - loss: 116672.1328 - acc: 0.0000e+00 - val_loss: 115489.0198 - val_acc: 2.8760e-04 Epoch 375/500 - 2s - loss: 115780.3421 - acc: 0.0000e+00 - val_loss: 115262.0790 - val_acc: 2.8760e-04 Epoch 376/500 - 2s - loss: 115960.9932 - acc: 0.0000e+00 - val_loss: 115038.9085 - val_acc: 2.8760e-04 Epoch 377/500 - 2s - loss: 115530.5127 - acc: 0.0000e+00 - val_loss: 114813.6754 - val_acc: 0.0000e+00 Epoch 378/500 - 2s - loss: 115317.4174 - acc: 0.0000e+00 - val_loss: 114693.2722 - val_acc: 0.0000e+00 Epoch 379/500 - 3s - loss: 115141.8721 - acc: 0.0000e+00 - val_loss: 114441.8749 - val_acc: 0.0000e+00 Epoch 380/500 - 2s - loss: 115077.0597 - acc: 0.0000e+00 - val_loss: 114240.7362 - val_acc: 0.0000e+00 Epoch 381/500 - 2s - loss: 115287.9334 - acc: 0.0000e+00 - val_loss: 114066.8323 - val_acc: 0.0000e+00 Epoch 382/500 - 2s - loss: 114989.8037 - acc: 0.0000e+00 - val_loss: 113839.9696 - val_acc: 0.0000e+00 Epoch 383/500 - 2s - loss: 114200.0416 - acc: 0.0000e+00 - val_loss: 113677.0101 - val_acc: 0.0000e+00 Epoch 384/500 - 2s - loss: 114247.6751 - acc: 0.0000e+00 - val_loss: 113528.1965 - val_acc: 0.0000e+00 Epoch 385/500 - 2s - loss: 114054.0377 - acc: 0.0000e+00 - val_loss: 113264.1270 - val_acc: 0.0000e+00 Epoch 386/500 - 2s - loss: 113723.0670 - acc: 0.0000e+00 - val_loss: 113045.2744 - val_acc: 0.0000e+00 Epoch 387/500 - 2s - loss: 113786.5941 - acc: 0.0000e+00 - val_loss: 112855.6029 - val_acc: 0.0000e+00 Epoch 388/500 - 2s - loss: 113634.7628 - acc: 0.0000e+00 - val_loss: 112633.6267 - val_acc: 0.0000e+00 Epoch 389/500 - 2s - loss: 113396.8655 - acc: 0.0000e+00 - val_loss: 112482.6460 - val_acc: 0.0000e+00 Epoch 390/500 - 2s - loss: 112991.9969 - acc: 0.0000e+00 - val_loss: 112256.1113 - val_acc: 0.0000e+00 Epoch 391/500 - 2s - loss: 112913.9521 - acc: 0.0000e+00 - val_loss: 112082.2194 - val_acc: 0.0000e+00 Epoch 392/500 - 2s - loss: 112492.2761 - acc: 0.0000e+00 - val_loss: 111849.5450 - val_acc: 0.0000e+00 Epoch 393/500 - 2s - loss: 112715.9312 - acc: 2.8760e-04 - val_loss: 111689.9044 - val_acc: 0.0000e+00 Epoch 394/500 - 2s - loss: 112070.0193 - acc: 0.0000e+00 - val_loss: 111487.3732 - val_acc: 0.0000e+00 Epoch 395/500 - 2s - loss: 111965.0013 - acc: 0.0000e+00 - val_loss: 111277.0022 - val_acc: 0.0000e+00 Epoch 396/500 - 2s - loss: 112032.1379 - acc: 2.8760e-04 - val_loss: 111080.6163 - val_acc: 0.0000e+00 Epoch 397/500 - 2s - loss: 111746.1851 - acc: 0.0000e+00 - val_loss: 110865.9358 - val_acc: 0.0000e+00 Epoch 398/500 - 2s - loss: 111350.7088 - acc: 0.0000e+00 - val_loss: 110683.5162 - val_acc: 0.0000e+00 Epoch 399/500 - 2s - loss: 111192.6004 - acc: 0.0000e+00 - val_loss: 110493.2994 - val_acc: 0.0000e+00 Epoch 400/500 - 2s - loss: 111064.0273 - acc: 0.0000e+00 - val_loss: 110349.7869 - val_acc: 0.0000e+00 Epoch 401/500 - 2s - loss: 111135.3876 - acc: 0.0000e+00 - val_loss: 110157.0301 - val_acc: 0.0000e+00 Epoch 402/500 - 2s - loss: 110886.2793 - acc: 0.0000e+00 - val_loss: 109948.8815 - val_acc: 0.0000e+00 Epoch 403/500 - 2s - loss: 111133.0543 - acc: 0.0000e+00 - val_loss: 109758.7219 - val_acc: 0.0000e+00 Epoch 404/500 - 2s - loss: 110534.4626 - acc: 0.0000e+00 - val_loss: 109576.6390 - val_acc: 0.0000e+00 Epoch 405/500 - 2s - loss: 109774.7329 - acc: 2.8760e-04 - val_loss: 109396.6508 - val_acc: 0.0000e+00 Epoch 406/500 - 2s - loss: 109774.2747 - acc: 0.0000e+00 - val_loss: 109144.6542 - val_acc: 0.0000e+00 Epoch 407/500 - 2s - loss: 110380.5870 - acc: 0.0000e+00 - val_loss: 108972.1907 - val_acc: 0.0000e+00 Epoch 408/500 - 2s - loss: 109792.3153 - acc: 0.0000e+00 - val_loss: 108749.0949 - val_acc: 0.0000e+00 Epoch 409/500 - 3s - loss: 109318.5718 - acc: 0.0000e+00 - val_loss: 108578.2403 - val_acc: 0.0000e+00 Epoch 410/500 - 2s - loss: 109326.5530 - acc: 0.0000e+00 - val_loss: 108398.6258 - val_acc: 0.0000e+00 Epoch 411/500 - 2s - loss: 109079.9669 - acc: 0.0000e+00 - val_loss: 108180.8901 - val_acc: 0.0000e+00 Epoch 412/500 - 2s - loss: 108734.4508 - acc: 0.0000e+00 - val_loss: 107940.2394 - val_acc: 0.0000e+00 Epoch 413/500 - 2s - loss: 108726.4618 - acc: 2.8760e-04 - val_loss: 107790.0739 - val_acc: 0.0000e+00 Epoch 414/500 - 2s - loss: 108553.8092 - acc: 2.8760e-04 - val_loss: 107601.8967 - val_acc: 0.0000e+00 Epoch 415/500 - 2s - loss: 108274.0472 - acc: 2.8760e-04 - val_loss: 107391.1087 - val_acc: 0.0000e+00 Epoch 416/500 - 2s - loss: 108062.3303 - acc: 0.0000e+00 - val_loss: 107212.7808 - val_acc: 0.0000e+00 Epoch 417/500 - 2s - loss: 108198.9622 - acc: 0.0000e+00 - val_loss: 107011.9945 - val_acc: 0.0000e+00 Epoch 418/500 - 2s - loss: 108044.6150 - acc: 0.0000e+00 - val_loss: 106828.9488 - val_acc: 0.0000e+00 Epoch 419/500 - 2s - loss: 107435.8329 - acc: 0.0000e+00 - val_loss: 106676.6987 - val_acc: 0.0000e+00 Epoch 420/500 - 2s - loss: 107447.0995 - acc: 0.0000e+00 - val_loss: 106476.0243 - val_acc: 0.0000e+00 Epoch 421/500 - 2s - loss: 106990.7731 - acc: 0.0000e+00 - val_loss: 106316.3703 - val_acc: 0.0000e+00 Epoch 422/500 - 2s - loss: 106989.1432 - acc: 0.0000e+00 - val_loss: 106114.7194 - val_acc: 0.0000e+00 Epoch 423/500 - 2s - loss: 106396.0855 - acc: 0.0000e+00 - val_loss: 105915.4027 - val_acc: 0.0000e+00 Epoch 424/500 - 2s - loss: 106907.2003 - acc: 0.0000e+00 - val_loss: 105736.8322 - val_acc: 0.0000e+00 Epoch 425/500 - 2s - loss: 106558.9849 - acc: 0.0000e+00 - val_loss: 105543.9781 - val_acc: 0.0000e+00 Epoch 426/500 - 2s - loss: 106139.7156 - acc: 0.0000e+00 - val_loss: 105384.5317 - val_acc: 0.0000e+00 Epoch 427/500 - 2s - loss: 106073.7979 - acc: 0.0000e+00 - val_loss: 105137.7199 - val_acc: 0.0000e+00 Epoch 428/500 - 2s - loss: 106076.5889 - acc: 2.8760e-04 - val_loss: 104977.6089 - val_acc: 0.0000e+00 Epoch 429/500 - 2s - loss: 105804.6027 - acc: 0.0000e+00 - val_loss: 104804.6289 - val_acc: 0.0000e+00 Epoch 430/500 - 2s - loss: 105256.3703 - acc: 0.0000e+00 - val_loss: 104595.1511 - val_acc: 0.0000e+00 Epoch 431/500 - 2s - loss: 105807.0633 - acc: 0.0000e+00 - val_loss: 104425.6364 - val_acc: 0.0000e+00 Epoch 432/500 - 2s - loss: 104712.9182 - acc: 0.0000e+00 - val_loss: 104193.5394 - val_acc: 0.0000e+00 Epoch 433/500 - 2s - loss: 104734.8768 - acc: 0.0000e+00 - val_loss: 104047.6493 - val_acc: 0.0000e+00 Epoch 434/500 - 2s - loss: 104429.0161 - acc: 0.0000e+00 - val_loss: 103818.8530 - val_acc: 0.0000e+00 Epoch 435/500 - 2s - loss: 104789.6414 - acc: 0.0000e+00 - val_loss: 103663.2740 - val_acc: 0.0000e+00 Epoch 436/500 - 2s - loss: 104433.4344 - acc: 0.0000e+00 - val_loss: 103498.8176 - val_acc: 0.0000e+00 Epoch 437/500 - 2s - loss: 104889.8699 - acc: 0.0000e+00 - val_loss: 103451.2628 - val_acc: 0.0000e+00 Epoch 438/500 - 2s - loss: 104864.0465 - acc: 0.0000e+00 - val_loss: 103255.0162 - val_acc: 0.0000e+00 Epoch 439/500 - 2s - loss: 104040.8685 - acc: 0.0000e+00 - val_loss: 103008.7451 - val_acc: 0.0000e+00 Epoch 440/500 - 2s - loss: 104036.8276 - acc: 0.0000e+00 - val_loss: 102763.1904 - val_acc: 0.0000e+00 Epoch 441/500 - 2s - loss: 103732.5937 - acc: 5.7521e-04 - val_loss: 102576.6484 - val_acc: 0.0000e+00 Epoch 442/500 - 2s - loss: 103316.5982 - acc: 2.8760e-04 - val_loss: 102413.2076 - val_acc: 0.0000e+00 Epoch 443/500 - 2s - loss: 102872.2242 - acc: 0.0000e+00 - val_loss: 102223.3422 - val_acc: 0.0000e+00 Epoch 444/500 - 2s - loss: 102570.9777 - acc: 0.0000e+00 - val_loss: 102027.9098 - val_acc: 0.0000e+00 Epoch 445/500 - 2s - loss: 102718.0920 - acc: 0.0000e+00 - val_loss: 101844.8713 - val_acc: 0.0000e+00 Epoch 446/500 - 2s - loss: 102045.5794 - acc: 0.0000e+00 - val_loss: 101680.7750 - val_acc: 0.0000e+00 Epoch 447/500 - 2s - loss: 102134.2523 - acc: 0.0000e+00 - val_loss: 101477.5510 - val_acc: 0.0000e+00 Epoch 448/500 - 2s - loss: 102052.5260 - acc: 0.0000e+00 - val_loss: 101285.5288 - val_acc: 0.0000e+00 Epoch 449/500 - 2s - loss: 101594.2875 - acc: 0.0000e+00 - val_loss: 101109.7019 - val_acc: 0.0000e+00 Epoch 450/500 - 2s - loss: 101836.7719 - acc: 0.0000e+00 - val_loss: 100943.6652 - val_acc: 0.0000e+00 Epoch 451/500 - 2s - loss: 101650.6314 - acc: 0.0000e+00 - val_loss: 100745.4184 - val_acc: 0.0000e+00 Epoch 452/500 - 2s - loss: 101238.4699 - acc: 0.0000e+00 - val_loss: 100550.1520 - val_acc: 0.0000e+00 Epoch 453/500 - 2s - loss: 101322.3064 - acc: 0.0000e+00 - val_loss: 100375.6924 - val_acc: 0.0000e+00 Epoch 454/500 - 2s - loss: 101109.8404 - acc: 2.8760e-04 - val_loss: 100223.6442 - val_acc: 0.0000e+00 Epoch 455/500 - 2s - loss: 100752.8784 - acc: 0.0000e+00 - val_loss: 100033.8470 - val_acc: 0.0000e+00 Epoch 456/500 - 2s - loss: 101362.2476 - acc: 0.0000e+00 - val_loss: 99922.4070 - val_acc: 0.0000e+00 Epoch 457/500 - 2s - loss: 100678.4181 - acc: 0.0000e+00 - val_loss: 99667.7510 - val_acc: 0.0000e+00 Epoch 458/500 - 2s - loss: 100373.5132 - acc: 0.0000e+00 - val_loss: 99564.4725 - val_acc: 0.0000e+00 Epoch 459/500 - 2s - loss: 100466.4105 - acc: 0.0000e+00 - val_loss: 99343.9122 - val_acc: 0.0000e+00 Epoch 460/500 - 2s - loss: 100088.5328 - acc: 0.0000e+00 - val_loss: 99202.9573 - val_acc: 0.0000e+00 Epoch 461/500 - 2s - loss: 99626.3508 - acc: 0.0000e+00 - val_loss: 98993.2318 - val_acc: 0.0000e+00 Epoch 462/500 - 2s - loss: 99258.5677 - acc: 2.8760e-04 - val_loss: 98808.4174 - val_acc: 0.0000e+00 Epoch 463/500 - 2s - loss: 99332.5237 - acc: 0.0000e+00 - val_loss: 98597.1025 - val_acc: 0.0000e+00 Epoch 464/500 - 2s - loss: 99095.4014 - acc: 0.0000e+00 - val_loss: 98432.5474 - val_acc: 0.0000e+00 Epoch 465/500 - 2s - loss: 99226.5777 - acc: 0.0000e+00 - val_loss: 98223.7187 - val_acc: 0.0000e+00 Epoch 466/500 - 2s - loss: 98936.5366 - acc: 2.8760e-04 - val_loss: 98077.4166 - val_acc: 0.0000e+00 Epoch 467/500 - 2s - loss: 98699.9250 - acc: 0.0000e+00 - val_loss: 97904.7124 - val_acc: 0.0000e+00 Epoch 468/500 - 3s - loss: 98909.5774 - acc: 0.0000e+00 - val_loss: 97709.9274 - val_acc: 0.0000e+00 Epoch 469/500 - 3s - loss: 98431.2522 - acc: 0.0000e+00 - val_loss: 97502.6008 - val_acc: 0.0000e+00 Epoch 470/500 - 2s - loss: 98540.8342 - acc: 0.0000e+00 - val_loss: 97381.7798 - val_acc: 0.0000e+00 Epoch 471/500 - 2s - loss: 97969.8392 - acc: 0.0000e+00 - val_loss: 97178.9770 - val_acc: 0.0000e+00 Epoch 472/500 - 2s - loss: 97845.4831 - acc: 0.0000e+00 - val_loss: 97032.6988 - val_acc: 0.0000e+00 Epoch 473/500 - 2s - loss: 98129.0703 - acc: 2.8760e-04 - val_loss: 96842.5350 - val_acc: 0.0000e+00 Epoch 474/500 - 2s - loss: 97265.9202 - acc: 0.0000e+00 - val_loss: 96669.7758 - val_acc: 0.0000e+00 Epoch 475/500 - 2s - loss: 97219.3831 - acc: 0.0000e+00 - val_loss: 96507.1375 - val_acc: 0.0000e+00 Epoch 476/500 - 2s - loss: 96995.6086 - acc: 0.0000e+00 - val_loss: 96292.6584 - val_acc: 0.0000e+00 Epoch 477/500 - 2s - loss: 97047.4388 - acc: 0.0000e+00 - val_loss: 96151.5447 - val_acc: 0.0000e+00 Epoch 478/500 - 2s - loss: 96775.5720 - acc: 0.0000e+00 - val_loss: 95950.7381 - val_acc: 0.0000e+00 Epoch 479/500 - 2s - loss: 96470.9253 - acc: 0.0000e+00 - val_loss: 95802.9676 - val_acc: 0.0000e+00 Epoch 480/500 - 2s - loss: 96606.6446 - acc: 2.8760e-04 - val_loss: 95629.9811 - val_acc: 0.0000e+00 Epoch 481/500 - 2s - loss: 96416.2753 - acc: 0.0000e+00 - val_loss: 95434.3204 - val_acc: 0.0000e+00 Epoch 482/500 - 2s - loss: 96372.4960 - acc: 2.8760e-04 - val_loss: 95266.8583 - val_acc: 0.0000e+00 Epoch 483/500 - 2s - loss: 96269.1918 - acc: 2.8760e-04 - val_loss: 95098.4984 - val_acc: 0.0000e+00 Epoch 484/500 - 3s - loss: 96293.3536 - acc: 0.0000e+00 - val_loss: 94966.5952 - val_acc: 0.0000e+00 Epoch 485/500 - 2s - loss: 96376.9943 - acc: 0.0000e+00 - val_loss: 94873.2154 - val_acc: 0.0000e+00 Epoch 486/500 - 2s - loss: 96008.5653 - acc: 0.0000e+00 - val_loss: 94760.8468 - val_acc: 0.0000e+00 Epoch 487/500 - 2s - loss: 95918.8077 - acc: 0.0000e+00 - val_loss: 94555.3004 - val_acc: 0.0000e+00 Epoch 488/500 - 2s - loss: 95363.1538 - acc: 0.0000e+00 - val_loss: 94336.3366 - val_acc: 0.0000e+00 Epoch 489/500 - 2s - loss: 95038.5756 - acc: 0.0000e+00 - val_loss: 94161.1894 - val_acc: 0.0000e+00 Epoch 490/500 - 2s - loss: 94864.6446 - acc: 0.0000e+00 - val_loss: 93962.7288 - val_acc: 0.0000e+00 Epoch 491/500 - 2s - loss: 94814.0764 - acc: 0.0000e+00 - val_loss: 93803.0049 - val_acc: 0.0000e+00 Epoch 492/500 - 2s - loss: 94703.7320 - acc: 0.0000e+00 - val_loss: 93614.7775 - val_acc: 0.0000e+00 Epoch 493/500 - 2s - loss: 94131.7050 - acc: 0.0000e+00 - val_loss: 93449.1868 - val_acc: 0.0000e+00 Epoch 494/500 - 2s - loss: 94446.0429 - acc: 2.8760e-04 - val_loss: 93351.3660 - val_acc: 0.0000e+00 Epoch 495/500 - 2s - loss: 93959.9684 - acc: 0.0000e+00 - val_loss: 93054.1864 - val_acc: 0.0000e+00 Epoch 496/500 - 2s - loss: 93791.6487 - acc: 0.0000e+00 - val_loss: 92952.5649 - val_acc: 0.0000e+00 Epoch 497/500 - 2s - loss: 93524.1133 - acc: 0.0000e+00 - val_loss: 92721.0337 - val_acc: 0.0000e+00 Epoch 498/500 - 2s - loss: 93566.1991 - acc: 0.0000e+00 - val_loss: 92584.3074 - val_acc: 0.0000e+00 Epoch 499/500 - 2s - loss: 93403.5945 - acc: 0.0000e+00 - val_loss: 92405.5947 - val_acc: 0.0000e+00 Epoch 500/500 - 2s - loss: 93290.7362 - acc: 0.0000e+00 - val_loss: 92233.8541 - val_acc: 0.0000e+00
# plot history
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='test val_loss')
plt.legend()
plt.show()
y_pred_lstm_imputed = model.predict(X_test)
plt.plot(Y_test.reshape(-1,1))
plt.plot(y_pred_lstm_imputed)
[<matplotlib.lines.Line2D at 0x1e54d899630>]
plt.plot(Y_test.reshape(-1,1))
plt.plot(y_pred_lstm_imputed)
plt.title('LSTM predicted vs actual values')
#plt.xlabel('Actual Price')
#plt.ylabel('Predicted Price')
#plt.legend(Y_test,y_pred_lstm_imputed)
plt.show()
print(Y_test.shape,X_test.shape,y_pred_lstm_imputed.shape)
(71, 1) (71, 1, 10) (71, 1)
Check and updated shape of X_test. Scale X_train and x_test on correct shape of dataframe.
#Inverse transform the prediction and Y_test
X_test = X_test.reshape((X_test.shape[0],X_test.shape[2]))
# Concatenate with X_test to reshape before inverse transform
y_pred_lstm_imputed = concatenate((y_pred_lstm_imputed,X_test),axis=1)
print(X_test.shape,y_pred_lstm_imputed.shape,Y_test.shape)
#Invert scaling for forecast
y_pred_lstm_imputed = scaler_df_imputed.inverse_transform(y_pred_lstm_imputed)
print(X_test.shape,y_pred_lstm_imputed.shape,Y_test.shape)
#slice reverse transformed y_pred values to compare with Y_test actual values
y_pred_lstm_imputed = y_pred_lstm_imputed[:,0:1]
(71, 10) (71, 11) (71, 1) (71, 10) (71, 11) (71, 1)
#Reconsturct Y_test with X_test and inverse transform it.
Y_test = concatenate((Y_test,X_test),axis=1)
Y_test = scaler_df_imputed.inverse_transform(Y_test)
#slice reverse transformed y_pred values to compare with Y_test actual values
Y_test = Y_test[:,0:1]
print(X_test.shape,y_pred_lstm_imputed.shape,Y_test.shape)
(71, 10) (71, 1) (71, 1)
#Calculate MSE between actual values Y_test and inverse transformed y_pred
mse = mean_squared_error(Y_test,y_pred_lstm_imputed)
print('Means Square Error between Y_test adn prediction values is :', mse)
Means Square Error between Y_test adn prediction values is : 982416894554.094
print('predicted- {} ; Actual - {}'.format(y_pred_lstm_imputed[-1], Y_test[-1]))
predicted- [297487.70399435] ; Actual - [1460579.96104185]
r2score = r2_score(Y_test,y_pred_lstm_imputed)
print('R2 score between Y_test adn prediction value is :', r2score)
R2 score between Y_test adn prediction value is : -45.0776194785916
model.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= lstm_14 (LSTM) (None, 512) 1071104 _________________________________________________________________ dropout_9 (Dropout) (None, 512) 0 _________________________________________________________________ dense_4 (Dense) (None, 1) 513 ================================================================= Total params: 1,071,617 Trainable params: 1,071,617 Non-trainable params: 0 _________________________________________________________________
# from sklearn import linear_model
# from sklearn.metrics import mean_squared_error, r2_score
Best observed high variance features Vs target label 'Value_SP500_REAL_PRICE_MONTH' to understand linear corelation.
This is to find colinearity between features and the prediction line for selected set of features. Predicted line would try to fit with linear regression model through the feature data points.
The dataset here is very imbalanced and non linear which makes it different to predict a linear line with optimum coefficients and intercept.
#5 features with highest variance.
max_var_cols1 = Print_PCAfeatures_graph(df_imputed,a,-10)
# visualize relationship between the features and the target price using scatterplots
sns.pairplot(df_imputed, x_vars=max_var_cols1,y_vars=['Value_SP500_REAL_PRICE_MONTH'], height=10, aspect=0.5,kind='reg')
<Figure size 3000x2000 with 0 Axes>
<seaborn.axisgrid.PairGrid at 0x1e55f6b4f28>
#5 feature with highest PCA variance.
max_var_cols2 = Print_PCAfeatures_graph(df_interpolate,b,-10)
print(max_var_cols2)
# visualize relationship between the features and the target price using seaborn scatterplot.
#High variance features Vs target label
sns.pairplot(df_interpolate, x_vars=max_var_cols2,y_vars=['Value_SP500_REAL_PRICE_MONTH'], height=10, aspect=0.5,kind='reg')
<Figure size 3000x2000 with 0 Axes>
['Value_SP500_EARNINGS_YIELD_MONTH', 'Value_SP500_PBV_RATIO_YEAR', 'Value_SP500_PE_RATIO_MONTH', 'Value_SP500_PBV_RATIO_QUARTER', 'Value_SP500_REAL_EARNINGS_GROWTH_QUARTER', 'Value_SP500_REAL_SALES_YEAR', 'Value_SHILLER_PE_RATIO_YEAR', 'Value_SP500_EARNINGS_YEAR', 'Value_SP500_DIV_YIELD_MONTH', 'Value_SP500_REAL_SALES_GROWTH_YEAR']
<seaborn.axisgrid.PairGrid at 0x1e560dd9278>
#5 feature with highest PCA variance.
max_var_cols3 = Print_PCAfeatures_graph(df_interpolate,b,10)
print(max_var_cols3)
# visualize relationship between the features and the target price using seaborn scatterplot.
#High variance features Vs target label
sns.pairplot(df_interpolate, x_vars=max_var_cols3,y_vars=['Value_SP500_REAL_PRICE_MONTH'], height=10, aspect=0.5,kind='reg')
<Figure size 3000x2000 with 0 Axes>
['Value_SP500_DIV_GROWTH_YEAR', 'Value_SP500_DIV_YEAR', 'Value_SP500_REAL_SALES_GROWTH_QUARTER', 'Value_SP500_SALES_YEAR', 'Value_SP500_SALES_QUARTER', 'Value_SP500_SALES_GROWTH_QUARTER', 'Value_SP500_EARNINGS_GROWTH_QUARTER', 'Value_SP500_REAL_EARNINGS_GROWTH_YEAR', 'Value_SHILLER_PE_RATIO_MONTH', 'Value_SP500_PSR_YEAR']
<seaborn.axisgrid.PairGrid at 0x1e5708805c0>
#Split data into train and test
#X_train, Y_train, X_test, Y_test = Create_Training_Test_Dataset(df_imputed,0.8,Linear_regr='True')
scaler = StandardScaler()
scaled_interpolated = scaler.fit_transform(df_PCA_features.values)
df_scaled_interpolated = pd.DataFrame(scaled_interpolated,index=df_PCA_features.index,columns = df_PCA_features.columns)
# Split the size into 80% and 20% row-wise
split_percent = 0.8
train_size = int(len(df_PCA_features) * split_percent)
test_size = len(df_PCA_features) - train_size
#print(train_size,test_size)
print('Training and Test dataset is of size {} & {}'.format(train_size,test_size))
#Slice the df into train and test df.
train = df_PCA_features.iloc[0:train_size,:]
test = df_PCA_features.iloc[train_size:len(df),:]
print(train.shape, test.shape)
Training and Test dataset is of size 2838 & 710 (2838, 11) (710, 11)
# #Slice the df into train and test df.
# train = df_scaled_interpolated.iloc[0:train_size,:]
# test = df_scaled_interpolated.iloc[train_size:len(df),:]
# print(train.shape, test.shape)
# Slice Train dataset.
temp_train = train.drop(['Value_SP500_REAL_PRICE_MONTH'],axis=1,inplace=False)
X_train = train.drop(['Value_SP500_REAL_PRICE_MONTH'],axis=1,inplace=False)
Y_train = train['Value_SP500_REAL_PRICE_MONTH']
print('Features size of X_train and training target Y_train shape is {} & {}'.format(X_train.shape,Y_train.shape))
#Test dataset
X_test = test.drop(['Value_SP500_REAL_PRICE_MONTH'],axis=1,inplace=False)
Y_test = test['Value_SP500_REAL_PRICE_MONTH']
print('Features size of X_test and Test target Y_test shape is {} & {}'.format(X_test.shape,Y_test.shape))
Features size of X_train and training target Y_train shape is (2838, 10) & (2838,) Features size of X_test and Test target Y_test shape is (710, 10) & (710,)
#np.array(temp_train[:]).reshape(-1,1)
#print((np.array(temp_train[:]).reshape(-1,2)).shape)
#from sklearn.linear_model import LinearRegression
# Instantiate linear regression object
regr = LinearRegression()
# Train the model using the training sets
regr.fit(X_train, Y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
normalize=False)
# print the intercept and coefficients
print(regr.intercept_)
print(regr.coef_)
137.11463021015638 [-3.89875853e+00 -5.67269650e-01 -5.46924799e+00 -6.41997252e+00 1.29380410e-01 2.46530686e+00 0.00000000e+00 -3.98730436e-02 1.55754077e-04 1.55754077e-04]
#Linear regression prediction on testing set
y_pred_inter_regr = regr.predict(X_test)
print(y_pred_inter_regr.shape,X_test.shape,Y_test.shape)
(710,) (710, 10) (710,)
feature_cols = X_train.columns
# pair the feature names with the coefficients
print('Coefficients of all feature variables in training dataset are: ')
list(zip(feature_cols, regr.coef_))
Coefficients of all feature variables in training dataset are:
[('Value_SP500_DIV_YIELD_MONTH', -3.8987585275669585),
('Value_SP500_PE_RATIO_MONTH', -0.5672696500428746),
('Value_SHILLER_PE_RATIO_MONTH', -5.469247991877379),
('Value_SP500_EARNINGS_YIELD_MONTH', -6.41997252365831),
('Value_SP500_INFLADJ_MONTH', 0.12938040986247704),
('Value_SP500_EARNINGS_MONTH', 2.465306856497017),
('Value_SP500_PSR_QUARTER', 0.0),
('Value_SP500_SALES_QUARTER', -0.03987304360362631),
('Value_SP500_REAL_SALES_GROWTH_QUARTER', 0.00015575407657666528),
('Value_SP500_REAL_EARNINGS_GROWTH_QUARTER', 0.00015575407657666528)]
# The coefficients
#print('Coefficients: \n', regr.coef_)
# The mean squared error: is always non-negative, and values closer to zero are better.
print('Mean squared error: {}'.format(mean_squared_error(Y_test, y_pred_inter_regr)))
# The Root mean squared error: RMSE is popular than MSE, because RMSE is interpretable in the "y" units.
print('Root Mean squared error: {}'.format(np.sqrt(mean_squared_error(Y_test, y_pred_inter_regr))))
# Explained variance score: 1 is perfect prediction
print('Variance R2 score: {}'.format(r2_score(Y_test, y_pred_inter_regr)))
print('intercept: ',regr.intercept_)
print('Score : ',regr.score)
Mean squared error: 1076172.6990871658
Root Mean squared error: 1037.3874392372243
Variance R2 score: -1.9382920775190278
intercept: 137.11463021015638
Score : <bound method RegressorMixin.score of LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
normalize=False)>
#Reshape prediction and test set to concatenate with X_test to get origninal form of test dataset.
y_pred_inter_regr = y_pred_inter_regr[:,np.newaxis]
Y_test = Y_test[:,np.newaxis]
print(X_train.shape,X_test.shape,y_pred_inter_regr.shape,Y_test.shape)
y_pred_inter_regr = concatenate((y_pred_inter_regr, X_test), axis=1)
print(X_test.shape,y_pred_inter_regr.shape,Y_test.shape)
(2838, 10) (710, 10) (710, 1) (710, 1) (710, 10) (710, 11) (710, 1)
# # Plot outputs
# for col in feature_cols:
# plt.scatter(X_test[col], Y_test)
# #plt.plot(X_test[col], y_pred_imputed_regr)
# #print(col)
# plt.xticks(())
# plt.yticks(())
# plt.show()
feature_cols = X_train.columns
# pair the feature names with the coefficients
print('Coefficients of all feature variables in training dataset are: ')
list(zip(feature_cols, regr.coef_))
Coefficients of all feature variables in training dataset are:
[('Value_SP500_DIV_YIELD_MONTH', -3.8987585275669585),
('Value_SP500_PE_RATIO_MONTH', -0.5672696500428746),
('Value_SHILLER_PE_RATIO_MONTH', -5.469247991877379),
('Value_SP500_EARNINGS_YIELD_MONTH', -6.41997252365831),
('Value_SP500_INFLADJ_MONTH', 0.12938040986247704),
('Value_SP500_EARNINGS_MONTH', 2.465306856497017),
('Value_SP500_PSR_QUARTER', 0.0),
('Value_SP500_SALES_QUARTER', -0.03987304360362631),
('Value_SP500_REAL_SALES_GROWTH_QUARTER', 0.00015575407657666528),
('Value_SP500_REAL_EARNINGS_GROWTH_QUARTER', 0.00015575407657666528)]
MSE, RMSE and R2 score are very bad with all features and PCA selected features.
# from fbprophet import Prophet
# # plt.style.available
# plt.style.use("seaborn-whitegrid")
# import plotly.figure_factory as ff
# import plotly.graph_objs as go
# from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# import statsmodels.api as sm
# from scipy import stats
#Reset index of df to make it a column as prerequisite for fbProphet.
df_fb = df_imputed.reset_index()
df_interpolate_fb = df_interpolate.reset_index()
df_interpolate_fb.tail() #for further use.
#Change dtype of Date column to datetime64 for fbProphet
df_interpolate_fb.loc[:,'Date'] = pd.to_datetime(df_interpolate_fb.loc[:,'Date'],format = '%Y%m%d')
#Plotting of S&P interpolated price vs date
# First Subplot
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14,5))
ax1.plot(df_interpolate_fb['Date'], df_interpolate_fb['Value_SP500_REAL_PRICE_MONTH'],color ='blue')
#ax1.plot(df_fb['Date'], df_fb['Value_SP500_REAL_PRICE_MONTH'],color='red')
ax1.set_xlabel("Date", fontsize=12)
ax1.set_ylabel("Real Price")
ax1.set_title("S&P 500 Index price History")
# #Second Subplot- original plot
# ax1.plot(df_fb['Date'], df_fb['Value_SP500_REAL_PRICE_MONTH'],color='red')
# ax1.set_xlabel("Date", fontsize=12)
# ax1.set_ylabel("Original S&P 500 Real Index price")
# ax1.set_title("S&P 500 Index price History")
#Third subplot
ax2.plot(df_interpolate_fb['Date'], df_interpolate_fb['Value_SP500_REAL_SALES_GROWTH_YEAR'],color='orange')
ax2.set_xlabel("Date", fontsize=12)
ax2.set_ylabel("SP500_REAL_SALES_GROWTH_YEAR")
ax2.set_title("SP500_REAL_SALES_GROWTH_YEAR")
plt.show()
#Data preparation for fbprophet model. model takes date/timeseries and target label as input for fit and predict
df_interpolate_fb = df_interpolate_fb[['Date','Value_SP500_REAL_PRICE_MONTH']]
#Rename column names to 'ds' and 'y' as required by fbprophet model.
df_interpolate_fb.rename(columns={'Date':'ds','Value_SP500_REAL_PRICE_MONTH' : 'y'},inplace=True)
print(df_interpolate_fb.info(),df_interpolate_fb.shape)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3548 entries, 0 to 3547 Data columns (total 2 columns): ds 3548 non-null datetime64[ns] y 3548 non-null float64 dtypes: datetime64[ns](1), float64(1) memory usage: 55.5 KB None (3548, 2)
fb = Prophet()
fb.fit(df_interpolate_fb)
INFO:fbprophet.forecaster:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this. C:\Users\Akshat\Anaconda3\lib\site-packages\pystan\misc.py:399: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
<fbprophet.forecaster.Prophet at 0x1e5515c5278>
#Create future dates
future_dates = fb.make_future_dataframe(periods=30)
#Predict prices for future dates
future_price = fb.predict(future_dates)
future_price[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
| ds | yhat | yhat_lower | yhat_upper | |
|---|---|---|---|---|
| 3573 | 2019-01-26 | 2104.546587 | 1953.566674 | 2269.087231 |
| 3574 | 2019-01-27 | 2109.010824 | 1959.552406 | 2266.648516 |
| 3575 | 2019-01-28 | 2109.505319 | 1955.279627 | 2277.497805 |
| 3576 | 2019-01-29 | 2111.732478 | 1961.375006 | 2265.803894 |
| 3577 | 2019-01-30 | 2115.552914 | 1964.223491 | 2276.856778 |
import matplotlib.dates as mdates
# Dates
starting_date = dt.datetime(2018, 11, 30)
starting_date1 = mdates.date2num(starting_date)
trend_date = dt.datetime(2019, 1, 1)
trend_date1 = mdates.date2num(trend_date)
pointing_arrow = dt.datetime(2019, 1, 1)
pointing_arrow1 = mdates.date2num(pointing_arrow)
# Plot.
fig = fb.plot(future_price)
ax1 = fig.add_subplot(111)
ax1.set_title("S&P500 Index Price Forecast", fontsize=16)
ax1.set_xlabel("Date", fontsize=12)
ax1.set_ylabel("Real Price", fontsize=12)
# Forecast initialization arrow
ax1.annotate('Forecast \n Initialization', xy=(pointing_arrow1, 2100), xytext=(starting_date1,2500),
arrowprops=dict(facecolor='#ff7f50', shrink=0.1),
)
# Trend emphasis arrow
ax1.annotate('Upward Trend', xy=(trend_date1, 2108), xytext=(trend_date1,950),
arrowprops=dict(facecolor='#6cff6c', shrink=0.1),
)
ax1.axhline(y=1260, color='b', linestyle='-')
plt.show()
C:\Users\Akshat\Anaconda3\lib\site-packages\matplotlib\cbook\deprecation.py:107: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance. In a future version, a new instance will always be created and returned. Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.
fig2 = fb.plot_components(future_price)
plt.show()
# Monthly Data Predictions
fbm = Prophet(changepoint_prior_scale=0.01).fit(df_interpolate_fb)
future = fbm.make_future_dataframe(periods=12, freq='M')
fcst = fbm.predict(future)
fig = fbm.plot(fcst)
plt.title("Monthly Prediction \n 1 year time frame")
plt.show()
INFO:fbprophet.forecaster:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
fig3 = fbm.plot_components(fcst)
plt.show()